Exemplo n.º 1
0
    def __init__(self, edge_params, from_mem, to_mem, from_inst, to_inst):

        super().__init__(f"lake_edge", debug=True)

        # PARAMETERS
        # data_out
        self.from_signal = edge_params["from_signal"]
        # data_in
        self.to_signal = edge_params["to_signal"]
        self.dim = edge_params["dim"]
        self.max_range = edge_params["max_range"]
        self.max_stride = edge_params["max_stride"]

        self._write(f"write_{self.to_signal}", width=1)

        forloop = ForLoop(iterator_support=self.dim,
                          config_width=clog2(self.max_range))

        # get memory params from top Lake or make a wrapper func for user
        # with just these params and then pass in mem for this signal
        # self._write_addr(f"write_addr_{to_signal}")

        self.add_child(f"loops_{self.from_signal}_{self.to_signal}",
                       forloop,
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write)

        AG_write = AddrGen(iterator_support=addr_gen_dim,
                           config_width=clog2(addr_gen_max_range))

        self.add_child(f"AG_write_{self.from_signal}_{self.to_signal}",
                       AG_write,
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write,
                       mux_sel=forloop.ports.mux_sel_out)

        safe_wire(self, AG_write.ports.addr_out, self._write_addr)
Exemplo n.º 2
0
    def __init__(
            self,
            data_width=16,  # CGRA Params
            mem_depth=32,
            default_iterator_support=3,
            interconnect_input_ports=2,  # Connection to int
            interconnect_output_ports=2,
            mem_input_ports=1,
            mem_output_ports=1,
            config_data_width=32,
            config_addr_width=8,
            cycle_count_width=16,
            add_clk_enable=True,
            add_flush=True):
        super().__init__("pond", debug=True)

        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.mem_input_ports = mem_input_ports
        self.mem_output_ports = mem_output_ports
        self.mem_depth = mem_depth
        self.data_width = data_width
        self.config_data_width = config_data_width
        self.config_addr_width = config_addr_width
        self.add_clk_enable = add_clk_enable
        self.add_flush = add_flush
        self.cycle_count_width = cycle_count_width
        self.default_iterator_support = default_iterator_support
        self.default_config_width = kts.clog2(self.mem_depth)
        # inputs
        self._clk = self.clock("clk")
        self._clk.add_attribute(
            FormalAttr(f"{self._clk.name}", FormalSignalConstraint.CLK))
        self._rst_n = self.reset("rst_n")
        self._rst_n.add_attribute(
            FormalAttr(f"{self._rst_n.name}", FormalSignalConstraint.RSTN))
        self._clk_en = self.clock_en("clk_en", 1)

        # Enable/Disable tile
        self._tile_en = self.input("tile_en", 1)
        self._tile_en.add_attribute(
            ConfigRegAttr("Tile logic enable manifested as clock gate"))

        gclk = self.var("gclk", 1)
        self._gclk = kts.util.clock(gclk)
        self.wire(gclk, kts.util.clock(self._clk & self._tile_en))

        self._cycle_count = add_counter(self, "cycle_count",
                                        self.cycle_count_width)

        # Create write enable + addr, same for read.
        # self._write = self.input("write", self.interconnect_input_ports)
        self._write = self.var("write", self.mem_input_ports)
        # self._write.add_attribute(ControlSignalAttr(is_control=True))

        self._write_addr = self.var("write_addr",
                                    kts.clog2(self.mem_depth),
                                    size=self.interconnect_input_ports,
                                    explicit_array=True,
                                    packed=True)

        # Add "_pond" suffix to avoid error during garnet RTL generation
        self._data_in = self.input("data_in_pond",
                                   self.data_width,
                                   size=self.interconnect_input_ports,
                                   explicit_array=True,
                                   packed=True)
        self._data_in.add_attribute(
            FormalAttr(f"{self._data_in.name}",
                       FormalSignalConstraint.SEQUENCE))
        self._data_in.add_attribute(ControlSignalAttr(is_control=False))

        self._read = self.var("read", self.mem_output_ports)
        self._t_write = self.var("t_write", self.interconnect_input_ports)
        self._t_read = self.var("t_read", self.interconnect_output_ports)
        # self._read.add_attribute(ControlSignalAttr(is_control=True))

        self._read_addr = self.var("read_addr",
                                   kts.clog2(self.mem_depth),
                                   size=self.interconnect_output_ports,
                                   explicit_array=True,
                                   packed=True)

        self._s_read_addr = self.var("s_read_addr",
                                     kts.clog2(self.mem_depth),
                                     size=self.interconnect_output_ports,
                                     explicit_array=True,
                                     packed=True)

        self._data_out = self.output("data_out_pond",
                                     self.data_width,
                                     size=self.interconnect_output_ports,
                                     explicit_array=True,
                                     packed=True)
        self._data_out.add_attribute(
            FormalAttr(f"{self._data_out.name}",
                       FormalSignalConstraint.SEQUENCE))
        self._data_out.add_attribute(ControlSignalAttr(is_control=False))

        self._valid_out = self.output("valid_out_pond",
                                      self.interconnect_output_ports)
        self._valid_out.add_attribute(
            FormalAttr(f"{self._valid_out.name}",
                       FormalSignalConstraint.SEQUENCE))
        self._valid_out.add_attribute(ControlSignalAttr(is_control=False))

        self._mem_data_out = self.var("mem_data_out",
                                      self.data_width,
                                      size=self.mem_output_ports,
                                      explicit_array=True,
                                      packed=True)

        self._s_mem_data_in = self.var("s_mem_data_in",
                                       self.data_width,
                                       size=self.interconnect_input_ports,
                                       explicit_array=True,
                                       packed=True)

        self._mem_data_in = self.var("mem_data_in",
                                     self.data_width,
                                     size=self.mem_input_ports,
                                     explicit_array=True,
                                     packed=True)

        self._s_mem_write_addr = self.var("s_mem_write_addr",
                                          kts.clog2(self.mem_depth),
                                          size=self.interconnect_input_ports,
                                          explicit_array=True,
                                          packed=True)

        self._s_mem_read_addr = self.var("s_mem_read_addr",
                                         kts.clog2(self.mem_depth),
                                         size=self.interconnect_output_ports,
                                         explicit_array=True,
                                         packed=True)

        self._mem_write_addr = self.var("mem_write_addr",
                                        kts.clog2(self.mem_depth),
                                        size=self.mem_input_ports,
                                        explicit_array=True,
                                        packed=True)

        self._mem_read_addr = self.var("mem_read_addr",
                                       kts.clog2(self.mem_depth),
                                       size=self.mem_output_ports,
                                       explicit_array=True,
                                       packed=True)

        if self.interconnect_output_ports == 1:
            self.wire(self._data_out[0], self._mem_data_out[0])
        else:
            for i in range(self.interconnect_output_ports):
                self.wire(self._data_out[i], self._mem_data_out[0])

        # Valid out is simply passing the read signal through...
        self.wire(self._valid_out, self._t_read)

        # Create write addressors
        for wr_port in range(self.interconnect_input_ports):

            RF_WRITE_ITER = ForLoop(
                iterator_support=self.default_iterator_support,
                config_width=self.cycle_count_width)
            RF_WRITE_ADDR = AddrGen(
                iterator_support=self.default_iterator_support,
                config_width=self.default_config_width)
            RF_WRITE_SCHED = SchedGen(
                iterator_support=self.default_iterator_support,
                config_width=self.cycle_count_width,
                use_enable=True)

            self.add_child(f"rf_write_iter_{wr_port}",
                           RF_WRITE_ITER,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           step=self._t_write[wr_port])
            # Whatever comes through here should hopefully just pipe through seamlessly
            # addressor modules
            self.add_child(f"rf_write_addr_{wr_port}",
                           RF_WRITE_ADDR,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           step=self._t_write[wr_port],
                           mux_sel=RF_WRITE_ITER.ports.mux_sel_out,
                           restart=RF_WRITE_ITER.ports.restart)
            safe_wire(self, self._write_addr[wr_port],
                      RF_WRITE_ADDR.ports.addr_out)

            self.add_child(f"rf_write_sched_{wr_port}",
                           RF_WRITE_SCHED,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           mux_sel=RF_WRITE_ITER.ports.mux_sel_out,
                           finished=RF_WRITE_ITER.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self._t_write[wr_port])

        # Create read addressors
        for rd_port in range(self.interconnect_output_ports):

            RF_READ_ITER = ForLoop(
                iterator_support=self.default_iterator_support,
                config_width=self.cycle_count_width)
            RF_READ_ADDR = AddrGen(
                iterator_support=self.default_iterator_support,
                config_width=self.default_config_width)
            RF_READ_SCHED = SchedGen(
                iterator_support=self.default_iterator_support,
                config_width=self.cycle_count_width,
                use_enable=True)

            self.add_child(f"rf_read_iter_{rd_port}",
                           RF_READ_ITER,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           step=self._t_read[rd_port])

            self.add_child(f"rf_read_addr_{rd_port}",
                           RF_READ_ADDR,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           step=self._t_read[rd_port],
                           mux_sel=RF_READ_ITER.ports.mux_sel_out,
                           restart=RF_READ_ITER.ports.restart)
            if self.interconnect_output_ports > 1:
                safe_wire(self, self._read_addr[rd_port],
                          RF_READ_ADDR.ports.addr_out)
            else:
                safe_wire(self, self._read_addr[rd_port],
                          RF_READ_ADDR.ports.addr_out)

            self.add_child(f"rf_read_sched_{rd_port}",
                           RF_READ_SCHED,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           mux_sel=RF_READ_ITER.ports.mux_sel_out,
                           finished=RF_READ_ITER.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self._t_read[rd_port])

        self.wire(self._write, self._t_write.r_or())
        self.wire(self._mem_write_addr[0],
                  decode(self, self._t_write, self._s_mem_write_addr))

        self.wire(self._mem_data_in[0],
                  decode(self, self._t_write, self._s_mem_data_in))

        self.wire(self._read, self._t_read.r_or())
        self.wire(self._mem_read_addr[0],
                  decode(self, self._t_read, self._s_mem_read_addr))
        # ===================================
        # Instantiate config hooks...
        # ===================================
        self.fw_int = 1
        self.data_words_per_set = 2**self.config_addr_width
        self.sets = int(
            (self.fw_int * self.mem_depth) / self.data_words_per_set)

        self.sets_per_macro = max(
            1, int(self.mem_depth / self.data_words_per_set))
        self.total_sets = max(1, 1 * self.sets_per_macro)

        self._config_data_in = self.input("config_data_in",
                                          self.config_data_width)
        self._config_data_in.add_attribute(ControlSignalAttr(is_control=False))

        self._config_data_in_shrt = self.var("config_data_in_shrt",
                                             self.data_width)

        self.wire(self._config_data_in_shrt,
                  self._config_data_in[self.data_width - 1, 0])

        self._config_addr_in = self.input("config_addr_in",
                                          self.config_addr_width)
        self._config_addr_in.add_attribute(ControlSignalAttr(is_control=False))

        self._config_data_out_shrt = self.var("config_data_out_shrt",
                                              self.data_width,
                                              size=self.total_sets,
                                              explicit_array=True,
                                              packed=True)

        self._config_data_out = self.output("config_data_out",
                                            self.config_data_width,
                                            size=self.total_sets,
                                            explicit_array=True,
                                            packed=True)
        self._config_data_out.add_attribute(
            ControlSignalAttr(is_control=False))

        for i in range(self.total_sets):
            self.wire(
                self._config_data_out[i],
                self._config_data_out_shrt[i].extend(self.config_data_width))

        self._config_read = self.input("config_read", 1)
        self._config_read.add_attribute(ControlSignalAttr(is_control=False))

        self._config_write = self.input("config_write", 1)
        self._config_write.add_attribute(ControlSignalAttr(is_control=False))

        self._config_en = self.input("config_en", self.total_sets)
        self._config_en.add_attribute(ControlSignalAttr(is_control=False))

        self._mem_data_cfg = self.var("mem_data_cfg",
                                      self.data_width,
                                      explicit_array=True,
                                      packed=True)

        self._mem_addr_cfg = self.var("mem_addr_cfg",
                                      kts.clog2(self.mem_depth))

        # Add config...
        stg_cfg_seq = StorageConfigSeq(
            data_width=self.data_width,
            config_addr_width=self.config_addr_width,
            addr_width=kts.clog2(self.mem_depth),
            fetch_width=self.data_width,
            total_sets=self.total_sets,
            sets_per_macro=self.sets_per_macro)

        # The clock to config sequencer needs to be the normal clock or
        # if the tile is off, we bring the clock back in based on config_en
        cfg_seq_clk = self.var("cfg_seq_clk", 1)
        self._cfg_seq_clk = kts.util.clock(cfg_seq_clk)
        self.wire(cfg_seq_clk, kts.util.clock(self._gclk))

        self.add_child(f"config_seq",
                       stg_cfg_seq,
                       clk=self._cfg_seq_clk,
                       rst_n=self._rst_n,
                       clk_en=self._clk_en | self._config_en.r_or(),
                       config_data_in=self._config_data_in_shrt,
                       config_addr_in=self._config_addr_in,
                       config_wr=self._config_write,
                       config_rd=self._config_read,
                       config_en=self._config_en,
                       wr_data=self._mem_data_cfg,
                       rd_data_out=self._config_data_out_shrt,
                       addr_out=self._mem_addr_cfg)

        if self.interconnect_output_ports == 1:
            self.wire(stg_cfg_seq.ports.rd_data_stg, self._mem_data_out)
        else:
            self.wire(stg_cfg_seq.ports.rd_data_stg[0], self._mem_data_out[0])

        self.RF_GEN = RegisterFile(data_width=self.data_width,
                                   write_ports=self.mem_input_ports,
                                   read_ports=self.mem_output_ports,
                                   width_mult=1,
                                   depth=self.mem_depth,
                                   read_delay=0)

        # Now we can instantiate and wire up the register file
        self.add_child(f"rf",
                       self.RF_GEN,
                       clk=self._gclk,
                       rst_n=self._rst_n,
                       data_out=self._mem_data_out)

        # Opt in for config_write
        self._write_rf = self.var("write_rf", self.mem_input_ports)
        self.wire(
            self._write_rf[0],
            kts.ternary(self._config_en.r_or(), self._config_write,
                        self._write[0]))
        for i in range(self.mem_input_ports - 1):
            self.wire(
                self._write_rf[i + 1],
                kts.ternary(self._config_en.r_or(), kts.const(0, 1),
                            self._write[i + 1]))
        self.wire(self.RF_GEN.ports.wen, self._write_rf)

        # Opt in for config_data_in
        for i in range(self.interconnect_input_ports):
            self.wire(
                self._s_mem_data_in[i],
                kts.ternary(self._config_en.r_or(), self._mem_data_cfg,
                            self._data_in[i]))
        self.wire(self.RF_GEN.ports.data_in, self._mem_data_in)

        # Opt in for config_addr
        for i in range(self.interconnect_input_ports):
            self.wire(
                self._s_mem_write_addr[i],
                kts.ternary(self._config_en.r_or(), self._mem_addr_cfg,
                            self._write_addr[i]))

        self.wire(self.RF_GEN.ports.wr_addr, self._mem_write_addr[0])

        for i in range(self.interconnect_output_ports):
            self.wire(
                self._s_mem_read_addr[i],
                kts.ternary(self._config_en.r_or(), self._mem_addr_cfg,
                            self._read_addr[i]))

        self.wire(self.RF_GEN.ports.rd_addr, self._mem_read_addr[0])

        if self.add_clk_enable:
            # self.clock_en("clk_en")
            kts.passes.auto_insert_clock_enable(self.internal_generator)
            clk_en_port = self.internal_generator.get_port("clk_en")
            clk_en_port.add_attribute(ControlSignalAttr(False))

        if self.add_flush:
            self.add_attribute("sync-reset=flush")
            kts.passes.auto_insert_sync_reset(self.internal_generator)
            flush_port = self.internal_generator.get_port("flush")
            flush_port.add_attribute(ControlSignalAttr(True))

        # Finally, lift the config regs...
        lift_config_reg(self.internal_generator)
Exemplo n.º 3
0
    def __init__(
            self,
            data_width=16,  # CGRA Params
            mem_width=64,
            mem_depth=512,
            banks=1,
            input_addr_iterator_support=6,
            output_addr_iterator_support=6,
            input_sched_iterator_support=6,
            output_sched_iterator_support=6,
            config_width=16,
            #  output_config_width=16,
            interconnect_input_ports=1,  # Connection to int
            interconnect_output_ports=1,
            mem_input_ports=1,
            mem_output_ports=1,
            read_delay=1,  # Cycle delay in read (SRAM vs Register File)
            rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
            agg_height=4):
        super().__init__("tb_formal", debug=True)

        self.fetch_width = mem_width // data_width
        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.agg_height = agg_height
        self.mem_depth = mem_depth
        self.banks = banks
        self.data_width = data_width
        self.config_width = config_width
        self.input_addr_iterator_support = input_addr_iterator_support
        self.output_addr_iterator_support = output_addr_iterator_support
        self.input_sched_iterator_support = input_sched_iterator_support
        self.output_sched_iterator_support = output_sched_iterator_support

        self.default_iterator_support = 6
        self.default_config_width = 16

        # inputs
        self._clk = self.clock("clk")
        self._clk.add_attribute(
            FormalAttr(f"{self._clk.name}", FormalSignalConstraint.CLK))
        self._rst_n = self.reset("rst_n")
        self._rst_n.add_attribute(
            FormalAttr(f"{self._rst_n.name}", FormalSignalConstraint.RSTN))

        self._cycle_count = self.var("cycle_count", 16)
        self.add_code(self.increment_cycle_count)
        self._read = self.var("read", 1)
        self._valid_in = self.output("valid_in", 1)
        self.wire(self._read, self._valid_in)
        self._valid_in.add_attribute(
            FormalAttr(f"{self._valid_in.name}",
                       FormalSignalConstraint.SEQUENCE))

        self._data_in = self.input("data_in",
                                   data_width,
                                   size=self.fetch_width,
                                   packed=True,
                                   explicit_array=True)
        self._data_in.add_attribute(
            FormalAttr(f"{self._data_in.name}",
                       FormalSignalConstraint.SEQUENCE))

        # outputs
        self._data_out = self.output("data_out",
                                     self.data_width,
                                     size=self.interconnect_output_ports,
                                     packed=True,
                                     explicit_array=True)
        self._data_out.add_attribute(
            FormalAttr(f"{self._data_out.name}",
                       FormalSignalConstraint.SEQUENCE))

        self._tb_read = self.var("tb_read", self.interconnect_output_ports)
        # Break out valids for formal!
        self._valid_out = self.output("valid_out",
                                      self.interconnect_output_ports)
        self._valid_out.add_attribute(
            FormalAttr(f"{self._valid_out.name}",
                       FormalSignalConstraint.SEQUENCE))
        self.wire(self._valid_out, self._tb_read)

        self.tb_height = 4

        self._tb_write_addr = self.var("tb_write_addr",
                                       6,
                                       size=self.interconnect_output_ports,
                                       packed=True,
                                       explicit_array=True)
        self._tb_read_addr = self.var("tb_read_addr",
                                      6,
                                      size=self.interconnect_output_ports,
                                      packed=True,
                                      explicit_array=True)

        self._tb = self.var("tb",
                            width=data_width,
                            size=(self.interconnect_output_ports,
                                  self.tb_height, self.fetch_width),
                            packed=True,
                            explicit_array=True)

        self._output_port_sel_addr = self.var(
            "tb_bank_sel_addr", max(1, clog2(self.interconnect_output_ports)))

        # -------------------------------- Delineate new group -------------------------------
        fl_ctr_sram_rd = ForLoop(
            iterator_support=self.default_iterator_support,
            config_width=self.default_config_width)
        loop_itr = fl_ctr_sram_rd.get_iter()
        loop_wth = fl_ctr_sram_rd.get_cfg_width()

        self.add_child(f"tb_write_loops",
                       fl_ctr_sram_rd,
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._read)

        self.add_child(f"tb_write_sched_gen",
                       SchedGen(iterator_support=self.default_iterator_support,
                                config_width=self.default_config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       cycle_count=self._cycle_count,
                       mux_sel=fl_ctr_sram_rd.ports.mux_sel_out,
                       finished=fl_ctr_sram_rd.ports.restart,
                       valid_output=self._read)

        for i in range(self.interconnect_output_ports):
            # fl_ctr_tb_wr = ForLoop(iterator_support=self.default_iterator_support,
            #                        config_width=self.default_config_width)
            # loop_itr = fl_ctr_tb_wr.get_iter()
            # loop_wth = fl_ctr_tb_wr.get_cfg_width()

            # self.add_child(f"tb_write_loops_{i}",
            #                fl_ctr_tb_wr,
            #                clk=self._clk,
            #                rst_n=self._rst_n,
            #                step=self._read & (self._output_port_sel_addr ==
            #                                   const(i, self._output_port_sel_addr.width)))

            newAG = AddrGen(iterator_support=self.default_iterator_support,
                            config_width=self.default_config_width)

            self.add_child(
                f"tb_write_addr_gen_{i}",
                newAG,
                clk=self._clk,
                rst_n=self._rst_n,
                step=self._read & (self._output_port_sel_addr == const(
                    i, self._output_port_sel_addr.width)),
                # addr_out=self._tb_write_addr[i])
                mux_sel=fl_ctr_sram_rd.ports.mux_sel_out,
                restart=fl_ctr_sram_rd.ports.restart)

            safe_wire(self, self._tb_write_addr[i], newAG.ports.addr_out)

            fl_ctr_tb_rd = ForLoop(
                iterator_support=self.default_iterator_support,
                config_width=self.default_config_width)
            loop_itr = fl_ctr_tb_rd.get_iter()
            loop_wth = fl_ctr_tb_rd.get_cfg_width()

            self.add_child(f"tb_read_loops_{i}",
                           fl_ctr_tb_rd,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._tb_read[i])

            newAG = AddrGen(iterator_support=self.default_iterator_support,
                            config_width=self.default_config_width)

            self.add_child(
                f"tb_read_addr_gen_{i}",
                newAG,
                clk=self._clk,
                rst_n=self._rst_n,
                step=self._tb_read[i],
                # addr_out=self._tb_read_addr[i])
                mux_sel=fl_ctr_tb_rd.ports.mux_sel_out,
                restart=fl_ctr_tb_rd.ports.restart)

            safe_wire(self, self._tb_read_addr[i], newAG.ports.addr_out)

            self.add_child(f"tb_read_sched_gen_{i}",
                           SchedGen(
                               iterator_support=self.default_iterator_support,
                               config_width=self.default_config_width),
                           clk=self._clk,
                           rst_n=self._rst_n,
                           cycle_count=self._cycle_count,
                           mux_sel=fl_ctr_tb_rd.ports.mux_sel_out,
                           finished=fl_ctr_tb_rd.ports.restart,
                           valid_output=self._tb_read[i])

        if self.interconnect_output_ports > 1:

            # fl_ctr_out_sel = ForLoop(iterator_support=self.default_iterator_support,
            #                          # config_width=clog2(self.interconnect_output_ports))
            #                          config_width=self.default_config_width)
            # loop_itr = fl_ctr_out_sel.get_iter()
            # loop_wth = fl_ctr_out_sel.get_cfg_width()

            # self.add_child(f"tb_sel_loops",
            #                fl_ctr_out_sel,
            #                clk=self._clk,
            #                rst_n=self._rst_n,
            #                step=self._read)

            self.add_child(f"out_port_sel_addr",
                           AddrGen(
                               iterator_support=self.default_iterator_support,
                               config_width=self.default_config_width),
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._read,
                           mux_sel=fl_ctr_sram_rd.ports.mux_sel_out,
                           addr_out=self._output_port_sel_addr)
            # Addr for port select should be driven on agg to sram write sched
        else:
            self.wire(self._output_port_sel_addr[0],
                      const(0, self._output_port_sel_addr.width))

        self.add_code(self.tb_ctrl)
        for idx in range(self.interconnect_output_ports):
            self.add_code(self.tb_to_out, idx=idx)
Exemplo n.º 4
0
    def __init__(self, mem_params, word_width):

        super().__init__("lake_mem", debug=True)

        ################################################################
        # PARAMETERS
        ################################################################
        # print("MEM PARAMS ", mem_params)

        # basic parameters
        self.word_width = word_width

        # general memory parameters
        self.mem_name = mem_params["name"]
        self.capacity = mem_params["capacity"]
        self.rw_same_cycle = mem_params["rw_same_cycle"]
        self.use_macro = mem_params["use_macro"]
        self.macro_name = mem_params["macro_name"]

        # number of port types
        self.num_read_write_ports = mem_params["num_read_write_ports"]
        self.num_read_only_ports = mem_params["num_read_ports"]
        self.num_write_only_ports = mem_params["num_write_ports"]
        self.num_read_ports = self.num_read_only_ports + self.num_read_write_ports
        self.num_write_ports = self.num_write_only_ports + self.num_read_write_ports

        # info for port types
        self.write_info = mem_params["write_info"]
        self.read_info = mem_params["read_info"]
        self.read_write_info = mem_params["read_write_info"]

        # TODO change - for now, we assume you cannot have read/write and read or write ports
        # should be the max of write vs read_write and need to handle more general case
        if self.num_read_write_ports == 0:
            self.write_width = mem_params["write_port_width"]
            self.read_width = mem_params["read_port_width"]
        else:
            self.write_width = mem_params["read_write_port_width"]
            self.read_width = mem_params["read_write_port_width"]

        assert self.capacity % self.write_width == 0, \
            "Memory capacity is not a multiple of the port width for writes"
        assert self.capacity % self.read_width == 0, \
            "Memory capacity is not a multiple of the port width for reads"

        # innermost dimension for size of memory is the size of whichever port
        # type has a wider width between reads and writes
        self.mem_size = max(self.read_width, self.write_width)
        # this assert has to be true if previous two asserts are true
        assert self.capacity % self.mem_size == 0
        # this is the last dimension for size of memory - equal to the number
        # of the port type with wider width addresses can fit in the memory
        self.mem_last_dim = int(self.capacity / self.mem_size)

        self.mem_size_bits = max(1, clog2(self.mem_size))
        self.mem_last_dim_bits = max(1, clog2(self.mem_last_dim))

        # chaining parameters and config regs
        self.chaining = mem_params["chaining"]
        self.num_chain = mem_params["num_chain"]
        self.num_chain_bits = clog2(self.num_chain)
        if self.chaining:
            self.chain_index = self.var("chain_index",
                                        width=self.num_chain_bits)
            self.chain_index.add_attribute(
                ConfigRegAttr("Chain index for chaining"))
            self.chain_index.add_attribute(
                FormalAttr(self.chain_index.name, FormalSignalConstraint.SET0))

        # minimum required widths for address signals
        if self.mem_size == self.write_width and self.mem_size == self.read_width:
            self.write_addr_width = self.mem_last_dim_bits + self.num_chain_bits
            self.read_addr_width = self.mem_last_dim_bits + self.num_chain_bits
        elif self.mem_size == self.write_width:
            self.write_addr_width = self.mem_last_dim_bits + self.num_chain_bits
            self.read_addr_width = self.mem_size_bits + self.mem_last_dim_bits + self.num_chain_bits
        elif self.mem_size == self.read_width:
            self.write_addr_width = self.mem_size_bits + self.mem_last_dim_bits + self.num_chain_bits
            self.read_addr_width = self.mem_last_dim_bits + self.num_chain_bits
        else:
            print("Error occurred! Memory size does not make sense.")

        ################################################################
        # I/O INTERFACE (WITHOUT ADDRESSING) + MEMORY
        ################################################################
        self.clk = self.clock("clk")
        # active low asynchornous reset
        self.rst_n = self.reset("rst_n", 1)

        self.data_in = self.input("data_in",
                                  width=self.word_width,
                                  size=(self.num_write_ports,
                                        self.write_width),
                                  explicit_array=True,
                                  packed=True)

        self.chain_en = self.input("chain_en", 1)

        # write enable (high: write, low: read when rw_same_cycle = False, else
        # only indicates write)
        self.write = self.input("write", width=1, size=self.num_write_ports)

        self.data_out = self.output("data_out",
                                    width=self.word_width,
                                    size=(self.num_read_ports,
                                          self.read_width),
                                    explicit_array=True,
                                    packed=True)

        self.write_chain = self.var("write_chain",
                                    width=1,
                                    size=self.num_write_ports)

        if self.use_macro:

            self.read_write_addr = self.input("read_write_addr",
                                              width=self.addr_width,
                                              size=self.num_read_write_ports,
                                              explicit_array=True)

            sram = SRAM(not self.use_macro, self.macro_name, word_width,
                        mem_params["read_write_port_width"],
                        mem_params["capacity"],
                        mem_params["num_read_write_ports"],
                        mem_params["num_read_write_ports"],
                        clog2(mem_params["capacity"]), 0, 1)

            self.add_child(
                "SRAM_" + mem_params["name"],
                sram,
                clk=self.clk,
                clk_en=1,
                mem_data_in_bank=self.data_in,
                mem_data_out_bank=self.data_out,
                mem_addr_in_bank=self.read_write_addr,
                # TODO adjust
                mem_cen_in_bank=1,
                mem_wen_in_bank=self.write_chain,
                wtsel=0,
                rtsel=1)

        else:
            # memory variable (not I/O)
            self.memory = self.var("memory",
                                   width=self.word_width,
                                   size=(self.mem_last_dim, self.mem_size),
                                   explicit_array=True,
                                   packed=True)

            ################################################################
            # ADDRESSING I/O AND SIGNALS
            ################################################################

            # I/O is different depending on whether we have read and write ports or
            # read/write ports

            # we keep address width at 16 to avoid unpacked
            # safe_wire errors for addr in hw_top_lake - can change by changing
            # default_config_width for those addr gens while accounting for muxing
            # bits, but the extra bits are unused anyway

            if self.rw_same_cycle:
                self.read = self.input("read",
                                       width=1,
                                       size=self.num_read_ports)
            else:
                self.read = self.var("read", width=1, size=self.num_read_ports)
                for i in range(self.num_read_ports):
                    self.wire(self.read[i], 1)

            # TODO change later - same read/write or read and write assumption as above
            if self.num_write_only_ports != 0 and self.num_read_only_ports != 0:
                # writes
                self.write_addr = self.input(
                    "write_addr",
                    width=16,  # self.write_addr_width,
                    size=self.num_write_ports,
                    explicit_array=True)

                assert self.write_info[0]["latency"] > 0, \
                    "Latency for write ports must be greater than 1 clock cycle."

                # reads
                self.read_addr = self.input(
                    "read_addr",
                    width=16,  # self.read_addr_width,
                    size=self.num_read_ports,
                    explicit_array=True)

                # TODO for now assuming all read ports have same latency
                # TODO also should add support for other latencies

            # rw_same_cycle is not valid here because read/write share the same port
            elif self.num_read_write_ports != 0:
                self.read_write_addr = self.input(
                    "read_write_addr",
                    width=
                    16,  # max(self.read_addr_width, self.write_addr_width),
                    size=self.num_read_write_ports,
                    explicit_array=True)

                # writes
                self.write_addr = self.var(
                    "write_addr",
                    width=16,  # self.write_addr_width,
                    size=self.num_read_write_ports,
                    explicit_array=True)

                for p in range(self.num_read_write_ports):
                    safe_wire(gen=self,
                              w_to=self.write_addr[p],
                              w_from=self.read_write_addr[p])

                # reads
                self.read_addr = self.var("read_addr",
                                          width=self.read_addr_width,
                                          size=self.num_read_write_ports,
                                          explicit_array=True)
                for p in range(self.num_read_write_ports):
                    safe_wire(gen=self,
                              w_to=self.read_addr[p],
                              w_from=self.read_write_addr[p])

                # TODO in self.read_write_info we should allow for different read
                # and write latencies?
                self.read_info = self.read_write_info

            # TODO just doing chaining for SRAM
            if self.chaining and self.num_read_write_ports > 0:
                self.wire(
                    self.write_chain,
                    # chaining not enabled
                    (
                        ~self.chain_en |
                        # chaining enabled
                        (self.chain_en &
                         (self.chain_index == self.read_write_addr[
                             self.write_addr_width + self.num_chain_bits,
                             self.write_addr_width]))) & self.write)
                # chaining not supported
            else:
                self.wire(self.write_chain, self.write)

            if self.use_macro:
                self.wire(sram.ports.mem_wen_in_bank, self.write_chain)

            self.add_write_data_block()
            self.add_read_data_block()
Exemplo n.º 5
0
    def __init__(
            self,
            data_width=16,  # CGRA Params
            mem_width=64,
            mem_depth=512,
            banks=1,
            input_addr_iterator_support=6,
            output_addr_iterator_support=6,
            input_sched_iterator_support=6,
            output_sched_iterator_support=6,
            config_width=16,
            #  output_config_width=16,
            interconnect_input_ports=2,  # Connection to int
            interconnect_output_ports=2,
            mem_input_ports=1,
            mem_output_ports=1,
            read_delay=1,  # Cycle delay in read (SRAM vs Register File)
            rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
            agg_height=4,
            tb_height=2):

        super().__init__("strg_ub_agg_sram_shared")

        ##################################################################################
        # Capture constructor parameter...
        ##################################################################################
        self.fetch_width = mem_width // data_width
        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.agg_height = agg_height
        self.tb_height = tb_height
        self.mem_width = mem_width
        self.mem_depth = mem_depth
        self.config_width = config_width
        self.data_width = data_width
        self.input_addr_iterator_support = input_addr_iterator_support
        self.input_sched_iterator_support = input_sched_iterator_support

        self.default_iterator_support = 6
        self.default_config_width = 16
        self.sram_iterator_support = 6
        self.agg_rd_addr_gen_width = 8

        ##################################################################################
        # IO
        ##################################################################################
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        self._cycle_count = self.input("cycle_count", 16)

        self._floop_mux_sel = self.output(
            "floop_mux_sel",
            width=max(clog2(self.default_iterator_support), 1),
            size=self.interconnect_input_ports,
            explicit_array=True,
            packed=True)

        self._floop_restart = self.output("floop_restart",
                                          width=1,
                                          size=self.interconnect_input_ports,
                                          explicit_array=True,
                                          packed=True)

        # The SRAM write is just the OR reduction of the aggregator reads
        self._agg_read_out = self.output("agg_read_out",
                                         self.interconnect_input_ports)
        self._agg_read = self.var("agg_read", self.interconnect_input_ports)

        self.wire(self._agg_read_out, self._agg_read)

        ##################################################################################
        # AGG PATHS
        ##################################################################################
        for i in range(self.interconnect_input_ports):

            self.agg_iter_support = 6
            self.agg_addr_width = 4
            self.agg_range_width = 16

            # Create for loop counters that can be shared across the input port selection and SRAM write
            fl_ctr_sram_wr = ForLoop(
                iterator_support=self.default_iterator_support,
                config_width=self.default_config_width)

            self.add_child(f"loops_in2buf_autovec_write_{i}",
                           fl_ctr_sram_wr,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._agg_read[i])

            safe_wire(gen=self,
                      w_to=self._floop_mux_sel[i],
                      w_from=fl_ctr_sram_wr.ports.mux_sel_out)
            self.wire(self._floop_restart[i], fl_ctr_sram_wr.ports.restart)

            # scheduler modules
            self.add_child(
                f"agg_read_sched_gen_{i}",
                SchedGen(
                    iterator_support=self.default_iterator_support,
                    # config_width=self.mem_addr_width),
                    config_width=16),
                clk=self._clk,
                rst_n=self._rst_n,
                cycle_count=self._cycle_count,
                mux_sel=fl_ctr_sram_wr.ports.mux_sel_out,
                finished=fl_ctr_sram_wr.ports.restart,
                valid_output=self._agg_read[i])
Exemplo n.º 6
0
    def __init__(self,
                 data_width=16,  # CGRA Params
                 mem_width=64,
                 mem_depth=512,
                 banks=1,
                 input_addr_iterator_support=6,
                 output_addr_iterator_support=6,
                 input_sched_iterator_support=6,
                 output_sched_iterator_support=6,
                 config_width=16,
                 #  output_config_width=16,
                 interconnect_input_ports=1,  # Connection to int
                 interconnect_output_ports=1,
                 mem_input_ports=1,
                 mem_output_ports=1,
                 read_delay=1,  # Cycle delay in read (SRAM vs Register File)
                 rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
                 agg_height=4):
        super().__init__("agg_formal", debug=True)

        self.fetch_width = mem_width // data_width
        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.agg_height = agg_height
        self.mem_depth = mem_depth

        self.default_iterator_support = 6
        self.default_config_width = 16

        # inputs
        self._clk = self.clock("clk")
        self._clk.add_attribute(FormalAttr(f"{self._clk.name}", FormalSignalConstraint.CLK))
        self._rst_n = self.reset("rst_n")
        self._rst_n.add_attribute(FormalAttr(f"{self._rst_n.name}", FormalSignalConstraint.RSTN))

        self._cycle_count = self.var("cycle_count", 16)
        self.add_code(self.increment_cycle_count)

        self._data_in = self.input("data_in", data_width,
                                   size=self.interconnect_input_ports,
                                   packed=True,
                                   explicit_array=True)
        self._data_in.add_attribute(FormalAttr(f"{self._data_in.name}", FormalSignalConstraint.SEQUENCE))

        self._agg_write = self.var("agg_write", self.interconnect_input_ports)
        self._valid_in = self.output("valid_in", self.interconnect_input_ports)
        self._valid_in.add_attribute(FormalAttr(f"{self._valid_in.name}", FormalSignalConstraint.SEQUENCE))
        self.wire(self._valid_in, self._agg_write)

        self._write = self.var("write", 1)
        self._valid_out = self.output("valid_out", 1)
        self._valid_out.add_attribute(FormalAttr(f"{self._valid_out.name}", FormalSignalConstraint.SEQUENCE))
        self.wire(self._write, self._valid_out)

        self._data_out = self.output("data_out", data_width,
                                     size=self.fetch_width,
                                     packed=True)
        self._data_out.add_attribute(FormalAttr(f"{self._data_out.name}", FormalSignalConstraint.SEQUENCE))

        # Make this based on the size
        self._agg_write_addr = self.var("agg_write_addr", 2 + clog2(self.agg_height),
                                        size=self.interconnect_input_ports,
                                        packed=True,
                                        explicit_array=True)
        self._agg_read_addr = self.var("agg_read_addr", max(1, clog2(self.agg_height)),
                                       size=self.interconnect_input_ports,
                                       packed=True,
                                       explicit_array=True)

        self.agg_rd_addr_gen_width = 8
        self._agg_read_addr_gen_out = self.var("agg_read_addr_gen_out", self.agg_rd_addr_gen_width,
                                               size=self.interconnect_input_ports,
                                               packed=True,
                                               explicit_array=True)
        self._input_port_sel_addr = self.var("input_port_sel_addr",
                                             max(1, clog2(self.interconnect_input_ports)))
        # Create an input to agg write scheduler + addressor for each input
        # Also need an addressor for the mux in addition to the read addr
        self._agg = self.var(f"agg",
                             width=data_width,
                             size=(self.interconnect_input_ports,
                                   self.agg_height,
                                   self.fetch_width),
                             packed=True,
                             explicit_array=True)

        output_loops = None

        for i in range(self.interconnect_input_ports):

            forloop_ctr = ForLoop(iterator_support=self.default_iterator_support,
                                  config_width=self.default_config_width)
            loop_itr = forloop_ctr.get_iter()
            loop_wth = forloop_ctr.get_cfg_width()

            self.add_child(f"agg_write_loops_{i}",
                           forloop_ctr,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._agg_write[i])

            newAG = AddrGen(iterator_support=self.default_iterator_support,
                            config_width=self.default_config_width)
            self.add_child(f"agg_write_addr_gen_{i}",
                           newAG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._agg_write[i],
                           # addr_out=self._agg_write_addr[i])
                           mux_sel=forloop_ctr.ports.mux_sel_out,
                           restart=forloop_ctr.ports.restart)
            safe_wire(self, self._agg_write_addr[i], newAG.ports.addr_out)

            newSG = SchedGen(iterator_support=self.default_iterator_support,
                             config_width=self.default_config_width)
            self.add_child(f"agg_write_sched_gen_{i}",
                           newSG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           mux_sel=forloop_ctr.ports.mux_sel_out,
                           finished=forloop_ctr.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self._agg_write[i])

            forloop_ctr_rd = ForLoop(iterator_support=self.default_iterator_support,
                                     config_width=self.default_config_width)
            loop_itr = forloop_ctr_rd.get_iter()
            loop_wth = forloop_ctr_rd.get_cfg_width()

            # Add loops for the output of each agg...
            self.add_child(f"agg_read_loops_{i}",
                           forloop_ctr_rd,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           # (self._input_port_sel_addr == const(i, self._input_port_sel_addr.width))))
                           step=self._write)

            output_loops = forloop_ctr_rd

            # And an associated read address...
            newAG = AddrGen(iterator_support=self.default_iterator_support,
                            config_width=self.default_config_width)
            self.add_child(f"agg_read_addr_gen_{i}",
                           newAG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._write,
                           #  (self._input_port_sel_addr == const(i, self._input_port_sel_addr.width))),
                           # addr_out=self._agg_read_addr_gen_out[i])
                           mux_sel=forloop_ctr_rd.ports.mux_sel_out,
                           restart=forloop_ctr_rd.ports.restart)

            safe_wire(self, self._agg_read_addr_gen_out[i], newAG.ports.addr_out)
            self.wire(self._agg_read_addr[i], self._agg_read_addr_gen_out[i][self._agg_read_addr.width - 1, 0])

        # Now we determine what data goes through to the sram...
        # If we have more than one port, we can generate a selector
        # to pick which input port should go through - then we send
        # the step signal to the appropriate input port
        if self.interconnect_input_ports > 1:

            # Create for loop counters that can be shared across the input port selection and SRAM write
            fl_ctr_sram_wr = ForLoop(iterator_support=self.default_iterator_support,
                                     config_width=self.default_config_width)
            loop_itr = fl_ctr_sram_wr.get_iter()
            loop_wth = fl_ctr_sram_wr.get_cfg_width()

            output_loops = fl_ctr_sram_wr

            self.add_child(f"agg_select_loops",
                           fl_ctr_sram_wr,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._write)

            tmp_AG = AddrGen(iterator_support=self.default_iterator_support,
                             # config_width=clog2(self.interconnect_input_ports)),
                             config_width=self.default_config_width)
            self.add_child(f"port_sel_addr",
                           tmp_AG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._write,
                           # addr_out=self._input_port_sel_addr)
                           mux_sel=fl_ctr_sram_wr.ports.mux_sel_out)
            safe_wire(self, self._input_port_sel_addr, tmp_AG.ports.addr_out)

        else:
            self.wire(self._input_port_sel_addr[0], const(0, self._input_port_sel_addr.width))

        # Addr for port select should be driven on agg to sram write sched
        # scheduler modules
        self.add_child(f"agg_read_output_sched_gen",
                       SchedGen(iterator_support=self.default_iterator_support,
                                config_width=self.default_config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       cycle_count=self._cycle_count,
                       mux_sel=output_loops.ports.mux_sel_out,
                       finished=output_loops.ports.restart,
                       valid_output=self._write)

        for idx in range(self.interconnect_input_ports):
            self.add_code(self.agg_ctrl, idx=idx)
        self.add_code(self.agg_to_sram)
Exemplo n.º 7
0
    def __init__(self,
                 word_width,
                 input_ports,
                 output_ports,
                 memories,
                 edges):

        super().__init__("LakeTop", debug=True)

        # parameters
        self.word_width = word_width
        self.input_ports = input_ports
        self.output_ports = output_ports

        self.default_config_width = 16
        self.cycle_count_width = 16

        self.stencil_valid = False

        # objects
        self.memories = memories
        self.edges = edges

        # tile enable and clock
        self.tile_en = self.input("tile_en", 1)
        self.tile_en.add_attribute(ConfigRegAttr("Tile logic enable manifested as clock gate"))
        self.tile_en.add_attribute(FormalAttr(self.tile_en.name, FormalSignalConstraint.SET1))

        self.clk_mem = self.clock("clk")
        self.clk_mem.add_attribute(FormalAttr(self.clk_mem.name, FormalSignalConstraint.CLK))

        # chaining
        chain_supported = False
        for mem in self.memories.keys():
            if self.memories[mem]["chaining"]:
                chain_supported = True
                break

        if chain_supported:
            self.chain_en = self.input("chain_en", 1)
            self.chain_en.add_attribute(ConfigRegAttr("Chaining enable"))
            self.chain_en.add_attribute(FormalAttr(self.chain_en.name, FormalSignalConstraint.SET0))
        else:
            self.chain_en = self.var("chain_en", 1)
            self.wire(self.chain_en, 0)

        # gate clock with tile_en
        gclk = self.var("gclk", 1)
        self.gclk = kts.util.clock(gclk)
        self.wire(gclk, self.clk_mem & self.tile_en)

        self.clk_en = self.clock_en("clk_en", 1)

        # active low asynchornous reset
        self.rst_n = self.reset("rst_n", 1)
        self.rst_n.add_attribute(FormalAttr(self.rst_n.name, FormalSignalConstraint.RSTN))

        # data in and out of top level Lake memory object
        self.data_in = self.input("data_in",
                                  width=self.word_width,
                                  size=self.input_ports,
                                  explicit_array=True,
                                  packed=True)
        self.data_in.add_attribute(FormalAttr(self.data_in.name, FormalSignalConstraint.SEQUENCE))

        self.data_out = self.output("data_out",
                                    width=self.word_width,
                                    size=self.output_ports,
                                    explicit_array=True,
                                    packed=True)
        self.data_out.add_attribute(FormalAttr(self.data_out.name, FormalSignalConstraint.SEQUENCE))

        # global cycle count for accessor comparison
        self._cycle_count = self.var("cycle_count", 16)

        @always_ff((posedge, self.gclk), (negedge, "rst_n"))
        def increment_cycle_count(self):
            if ~self.rst_n:
                self._cycle_count = 0
            else:
                self._cycle_count = self._cycle_count + 1

        self.add_always(increment_cycle_count)

        # info about memories
        num_mem = len(memories)
        subscript_mems = list(self.memories.keys())

        # list of the data out from each memory
        self.mem_data_outs = [self.var(f"mem_data_out_{subscript_mems[i]}",
                                       width=self.word_width,
                                       size=self.memories[subscript_mems[i]]
                                       ["read_port_width" if "read_port_width" in self.memories[subscript_mems[i]]
                                        else "read_write_port_width"],
                                       explicit_array=True, packed=True) for i in range(num_mem)]

        # keep track of write, read_addr, and write_addr vars for read/write memories
        # to later check whether there is a write and what to use for the shared port
        self.mem_read_write_addrs = {}

        # create memory instance for each memory
        self.mem_insts = {}
        i = 0
        for mem in self.memories.keys():
            m = mem_inst(self.memories[mem], self.word_width)
            self.mem_insts[mem] = m

            self.add_child(mem,
                           m,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           # put data out in memory data out list
                           data_out=self.mem_data_outs[i],
                           chain_en=self.chain_en)
            i += 1

        # get input and output memories
        is_input, is_output = [], []
        for mem_name in self.memories.keys():
            mem = self.memories[mem_name]
            if mem["is_input"]:
                is_input.append(mem_name)
            if mem["is_output"]:
                is_output.append(mem_name)

        # TODO direct connection to write doesn't work (?), so have to do this...
        self.low = self.var("low", 1)
        self.wire(self.low, 0)

        # TODO adding multiple ports to 1 memory after talking about mux with compiler team

        # set up input memories
        for i in range(len(is_input)):
            in_mem = is_input[i]

            # input addressor / accessor parameters
            input_dim = self.memories[in_mem]["input_edge_params"]["dim"]
            input_range = self.memories[in_mem]["input_edge_params"]["max_range"]
            input_stride = self.memories[in_mem]["input_edge_params"]["max_stride"]
            # input port associated with memory
            input_port_index = self.memories[in_mem]["input_port"]

            self.valid = self.var(
                f"input_port{input_port_index}_2{in_mem}_accessor_valid", 1)
            self.wire(self.mem_insts[in_mem].ports.write, self.valid)

            # hook up data from the specified input port to the memory
            safe_wire(self, self.mem_insts[in_mem].ports.data_in[0],
                      self.data_in[input_port_index])

            if self.memories[in_mem]["num_read_write_ports"] > 0:
                self.mem_read_write_addrs[in_mem] = {"write": self.valid}

            # create IteratorDomain, AddressGenerator, and ScheduleGenerator
            # for writes to this input memory
            forloop = ForLoop(iterator_support=input_dim,
                              config_width=max(1, clog2(input_range)))  # self.default_config_width)
            loop_itr = forloop.get_iter()
            loop_wth = forloop.get_cfg_width()

            self.add_child(f"input_port{input_port_index}_2{in_mem}_forloop",
                           forloop,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid)

            newAG = AddrGen(iterator_support=input_dim,
                            config_width=max(1, clog2(input_stride)))  # self.default_config_width)
            self.add_child(f"input_port{input_port_index}_2{in_mem}_write_addr_gen",
                           newAG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid,
                           mux_sel=forloop.ports.mux_sel_out,
                           restart=forloop.ports.restart)

            if self.memories[in_mem]["num_read_write_ports"] == 0:
                safe_wire(self, self.mem_insts[in_mem].ports.write_addr[0], newAG.ports.addr_out)
            else:
                self.mem_read_write_addrs[in_mem]["write_addr"] = newAG.ports.addr_out

            newSG = SchedGen(iterator_support=input_dim,
                             config_width=self.cycle_count_width)
            self.add_child(f"input_port{input_port_index}_2{in_mem}_write_sched_gen",
                           newSG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           mux_sel=forloop.ports.mux_sel_out,
                           finished=forloop.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self.valid)

        # set up output memories
        for i in range(len(is_output)):
            out_mem = is_output[i]

            # output addressor / accessor parameters
            output_dim = self.memories[out_mem]["output_edge_params"]["dim"]
            output_range = self.memories[out_mem]["output_edge_params"]["max_range"]
            output_stride = self.memories[out_mem]["output_edge_params"]["max_stride"]
            # output port associated with memory
            output_port_index = self.memories[out_mem]["output_port"]

            # hook up data from the memory to the specified output port
            self.wire(self.data_out[output_port_index],
                      self.mem_insts[out_mem].ports.data_out[0][0])
            # self.mem_data_outs[subscript_mems.index(out_mem)][0])

            self.valid = self.var(f"{out_mem}2output_port{output_port_index}_accessor_valid", 1)
            if self.memories[out_mem]["rw_same_cycle"]:
                self.wire(self.mem_insts[out_mem].ports.read, self.valid)

            # create IteratorDomain, AddressGenerator, and ScheduleGenerator
            # for reads from this output memory
            forloop = ForLoop(iterator_support=output_dim,
                              config_width=max(1, clog2(output_range)))  # self.default_config_width)
            loop_itr = forloop.get_iter()
            loop_wth = forloop.get_cfg_width()

            self.add_child(f"{out_mem}2output_port{output_port_index}_forloop",
                           forloop,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid)

            newAG = AddrGen(iterator_support=output_dim,
                            config_width=max(1, clog2(output_stride)))  # self.default_config_width)
            self.add_child(f"{out_mem}2output_port{output_port_index}_read_addr_gen",
                           newAG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid,
                           mux_sel=forloop.ports.mux_sel_out,
                           restart=forloop.ports.restart)

            if self.memories[out_mem]["num_read_write_ports"] == 0:
                safe_wire(self, self.mem_insts[out_mem].ports.read_addr[0], newAG.ports.addr_out)
            else:
                self.mem_read_write_addrs[in_mem]["read_addr"] = newAG.ports.addr_out

            newSG = SchedGen(iterator_support=output_dim,
                             config_width=self.cycle_count_width)  # self.default_config_width)
            self.add_child(f"{out_mem}2output_port{output_port_index}_read_sched_gen",
                           newSG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           mux_sel=forloop.ports.mux_sel_out,
                           finished=forloop.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self.valid)

        # create shared IteratorDomains and accessors as well as
        # read/write addressors for memories connected by each edge
        for edge in self.edges:

            # see how many signals need to be selected between for
            # from and to signals for edge
            num_mux_from = len(edge["from_signal"])
            num_mux_to = len(edge["to_signal"])

            # get unique edge_name identifier for hardware modules
            edge_name = get_edge_name(edge)

            # create forloop and accessor valid output signal
            self.valid = self.var(edge_name + "_accessor_valid", 1)

            forloop = ForLoop(iterator_support=edge["dim"])
            self.forloop = forloop
            loop_itr = forloop.get_iter()
            loop_wth = forloop.get_cfg_width()

            self.add_child(edge_name + "_forloop",
                           forloop,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid)

            # create input addressor
            readAG = AddrGen(iterator_support=edge["dim"],
                             config_width=self.default_config_width)
            self.add_child(f"{edge_name}_read_addr_gen",
                           readAG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid,
                           mux_sel=forloop.ports.mux_sel_out,
                           restart=forloop.ports.restart)

            # assign read address to all from memories
            if self.memories[edge["from_signal"][0]]["num_read_write_ports"] == 0:
                # can assign same read addrs to all the memories
                for i in range(len(edge["from_signal"])):
                    safe_wire(self, self.mem_insts[edge["from_signal"][i]].ports.read_addr[0], readAG.ports.addr_out)
            else:
                for i in range(len(edge["from_signal"])):
                    self.mem_read_write_addrs[edge["from_signal"][i]]["read_addr"] = readAG.ports.addr_out

            # if needing to mux, choose which from memory we get data
            # from for to memory data in
            if num_mux_from > 1:
                num_mux_bits = clog2(num_mux_from)
                self.mux_sel = self.var(f"{edge_name}_mux_sel",
                                        width=num_mux_bits)

                read_addr_width = max(1, clog2(self.memories[edge["from_signal"][0]]["capacity"]))
                # decide which memory to get data from for to memory's data in
                safe_wire(self, self.mux_sel,
                          readAG.ports.addr_out[read_addr_width + num_mux_from - 1, read_addr_width])

                comb_mux_from = self.combinational()
                # for i in range(num_mux_from):
                # TODO want to use a switch statement here, but get add_fn_ln issue
                if_mux_sel = IfStmt(self.mux_sel == 0)
                for j in range(len(edge["to_signal"])):
                    # print("TO ", edge["to_signal"][j])
                    # print("FROM ", edge["from_signal"][i])
                    if_mux_sel.then_(self.mem_insts[edge["to_signal"][j]].ports.data_in.assign(self.mem_insts[edge["from_signal"][0]].ports.data_out))
                    if_mux_sel.else_(self.mem_insts[edge["to_signal"][j]].ports.data_in.assign(self.mem_insts[edge["from_signal"][1]].ports.data_out))
                comb_mux_from.add_stmt(if_mux_sel)

            # no muxing from, data_out from the one and only memory
            # goes to all to memories (valid determines whether it is
            # actually written)
            else:
                for j in range(len(edge["to_signal"])):
                    # print("TO ", edge["to_signal"][j])
                    # print("FROM ", edge["from_signal"][0])
                    safe_wire(self,
                              self.mem_insts[edge["to_signal"][j]].ports.data_in,
                              # only one memory to read from
                              self.mem_insts[edge["from_signal"][0]].ports.data_out)

            # create output addressor
            writeAG = AddrGen(iterator_support=edge["dim"],
                              config_width=self.default_config_width)
            # step, mux_sel, restart may need delayed signals (assigned later)
            self.add_child(f"{edge_name}_write_addr_gen",
                           writeAG,
                           clk=self.gclk,
                           rst_n=self.rst_n)

            # set write addr for to memories
            if self.memories[edge["to_signal"][0]]["num_read_write_ports"] == 0:
                for i in range(len(edge["to_signal"])):
                    safe_wire(self, self.mem_insts[edge["to_signal"][i]].ports.write_addr[0], writeAG.ports.addr_out)
            else:
                for i in range(len(edge["to_signal"])):
                    self.mem_read_write_addrs[edge["to_signal"][i]] = {"write": self.valid, "write_addr": writeAG.ports.addr_out}

            # calculate necessary delay between from_signal to to_signal
            # TODO this may need to be more sophisticated and based on II as well
            # TODO just need to add for loops for all the ports
            if self.memories[edge["from_signal"][0]]["num_read_write_ports"] == 0:
                self.delay = self.memories[edge["from_signal"][0]]["read_info"][0]["latency"]
            else:
                self.delay = self.memories[edge["from_signal"][0]]["read_write_info"][0]["latency"]

            if self.delay > 0:
                # signals that need to be delayed due to edge latency
                self.delayed_writes = self.var(f"{edge_name}_delayed_writes",
                                               width=self.delay)
                self.delayed_mux_sels = self.var(f"{edge_name}_delayed_mux_sels",
                                                 width=self.forloop.ports.mux_sel_out.width,
                                                 size=self.delay,
                                                 explicit_array=True,
                                                 packed=True)
                self.delayed_restarts = self.var(f"{edge_name}_delayed_restarts",
                                                 width=self.delay)

                # delay in valid between read from memory and write to next memory
                @always_ff((posedge, self.gclk), (negedge, "rst_n"))
                def get_delayed_write(self):
                    if ~self.rst_n:
                        self.delayed_writes = 0
                        self.delayed_mux_sels = 0
                        self.delayed_restarts = 0
                    else:
                        for i in range(self.delay - 1):
                            self.delayed_writes[i + 1] = self.delayed_writes[i]
                            self.delayed_mux_sels[i + 1] = self.delayed_mux_sels[i]
                            self.delayed_restarts[i + 1] = self.delayed_restarts[i]
                        self.delayed_writes[0] = self.valid
                        self.delayed_mux_sels[0] = self.forloop.ports.mux_sel_out
                        self.delayed_restarts[0] = self.forloop.ports.restart

                self.add_always(get_delayed_write)

            # if we have a mux for the destination memories,
            # choose which mux to write to
            if num_mux_to > 1:
                num_mux_bits = clog2(num_mux_to)
                self.mux_sel_to = self.var(f"{edge_name}_mux_sel_to",
                                           width=num_mux_bits)

                write_addr_width = max(1, clog2(self.memories[edge["to_signal"][0]]["capacity"]))
                # decide which destination memory gets written to
                safe_wire(self, self.mux_sel_to,
                          writeAG.ports.addr_out[write_addr_width + num_mux_to - 1, write_addr_width])

                # wire the write (or if needed, delayed write) signal to the selected destination memory
                # and set write enable low for all other destination memories
                comb_mux_to = self.combinational()
                for i in range(num_mux_to):
                    if_mux_sel_to = IfStmt(self.mux_sel_to == i)
                    if self.delay == 0:
                        if_mux_sel_to.then_(self.mem_insts[edge["to_signal"][i]].ports.write.assign(self.valid))
                    else:
                        if_mux_sel_to.then_(self.mem_insts[edge["to_signal"][i]].ports.write.assign(self.delayed_writes[self.delay - 1]))

                    if_mux_sel_to.else_(self.mem_insts[edge["to_signal"][i]].ports.write.assign(self.low))
                    comb_mux_to.add_stmt(if_mux_sel_to)

            # no muxing to, just write to the one destination memory
            else:
                if self.delay == 0:
                    self.wire(self.mem_insts[edge["to_signal"][0]].ports.write, self.valid)
                else:
                    self.wire(self.mem_insts[edge["to_signal"][0]].ports.write, self.delayed_writes[self.delay - 1])

            # assign delayed signals for write addressor if needed
            if self.delay == 0:
                self.wire(writeAG.ports.step, self.valid)
                self.wire(writeAG.ports.mux_sel, self.forloop.ports.mux_sel_out)
                self.wire(writeAG.ports.restart, self.forloop.ports.restart)
            else:
                self.wire(writeAG.ports.step, self.delayed_writes[self.delay - 1])
                self.wire(writeAG.ports.mux_sel, self.delayed_mux_sels[self.delay - 1])
                self.wire(writeAG.ports.restart, self.delayed_restarts[self.delay - 1])

            # create accessor for edge
            newSG = SchedGen(iterator_support=edge["dim"],
                             config_width=self.cycle_count_width)  # self.default_config_width)

            self.add_child(edge_name + "_sched_gen",
                           newSG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           mux_sel=forloop.ports.mux_sel_out,
                           finished=forloop.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self.valid)

        # for read write memories, choose either read or write address based on whether
        # we are writing to the memory (whether write enable is high)
        read_write_addr_comb = self.combinational()
        for mem_name in self.memories:
            if mem_name in self.mem_read_write_addrs:
                mem_info = self.mem_read_write_addrs[mem_name]
                if_write = IfStmt(mem_info["write"] == 1)
                addr_width = self.mem_insts[mem_name].ports.read_write_addr[0].width
                if_write.then_(self.mem_insts[mem_name].ports.read_write_addr[0].assign(mem_info["write_addr"][addr_width - 1, 0]))
                if_write.else_(self.mem_insts[mem_name].ports.read_write_addr[0].assign(mem_info["read_addr"][addr_width - 1, 0]))
                read_write_addr_comb.add_stmt(if_write)

        # clock enable and flush passes
        kts.passes.auto_insert_clock_enable(self.internal_generator)
        clk_en_port = self.internal_generator.get_port("clk_en")
        clk_en_port.add_attribute(FormalAttr(clk_en_port.name, FormalSignalConstraint.SET1))

        self.add_attribute("sync-reset=flush")
        kts.passes.auto_insert_sync_reset(self.internal_generator)
        flush_port = self.internal_generator.get_port("flush")

        # bring config registers up to top level
        lift_config_reg(self.internal_generator)
Exemplo n.º 8
0
    def __init__(
            self,
            data_width=16,  # CGRA Params
            mem_width=64,
            mem_depth=512,
            banks=1,
            input_addr_iterator_support=6,
            output_addr_iterator_support=6,
            input_sched_iterator_support=6,
            output_sched_iterator_support=6,
            config_width=16,
            #  output_config_width=16,
            interconnect_input_ports=2,  # Connection to int
            interconnect_output_ports=2,
            mem_input_ports=1,
            mem_output_ports=1,
            read_delay=1,  # Cycle delay in read (SRAM vs Register File)
            rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
            agg_height=4,
            tb_height=2):

        super().__init__("strg_ub_sram_tb_shared")

        ##################################################################################
        # Capture constructor parameter...
        ##################################################################################
        self.fetch_width = mem_width // data_width
        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.agg_height = agg_height
        self.tb_height = tb_height
        self.mem_width = mem_width
        self.mem_depth = mem_depth
        self.config_width = config_width
        self.data_width = data_width
        self.input_addr_iterator_support = input_addr_iterator_support
        self.input_sched_iterator_support = input_sched_iterator_support

        self.default_iterator_support = 6
        self.default_config_width = 16
        self.sram_iterator_support = 6
        self.agg_rd_addr_gen_width = 8

        ##################################################################################
        # IO
        ##################################################################################
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        self._cycle_count = self.input("cycle_count", 16)

        self._loops_sram2tb_mux_sel = self.output(
            "loops_sram2tb_mux_sel",
            width=max(clog2(self.default_iterator_support), 1),
            size=self.interconnect_output_ports,
            explicit_array=True,
            packed=True)

        self._loops_sram2tb_restart = self.output(
            "loops_sram2tb_restart",
            width=1,
            size=self.interconnect_output_ports,
            explicit_array=True,
            packed=True)

        self._t_read_out = self.output("t_read_out",
                                       self.interconnect_output_ports)
        self._t_read = self.var("t_read", self.interconnect_output_ports)
        self.wire(self._t_read_out, self._t_read)

        ##################################################################################
        # TB PATHS
        ##################################################################################
        for i in range(self.interconnect_output_ports):

            # for loop for sram reads, tb writes
            loops_sram2tb = ForLoop(
                iterator_support=self.default_iterator_support,
                config_width=self.default_config_width)

            self.add_child(f"loops_buf2out_autovec_read_{i}",
                           loops_sram2tb,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._t_read[i])

            safe_wire(gen=self,
                      w_to=self._loops_sram2tb_mux_sel[i],
                      w_from=loops_sram2tb.ports.mux_sel_out)
            self.wire(self._loops_sram2tb_restart[i],
                      loops_sram2tb.ports.restart)

            # sram read schedule, delay by 1 clock cycle for tb write schedule (done in tb_only)
            self.add_child(
                f"output_sched_gen_{i}",
                SchedGen(
                    iterator_support=self.default_iterator_support,
                    # config_width=self.default_config_width),
                    config_width=16),
                clk=self._clk,
                rst_n=self._rst_n,
                cycle_count=self._cycle_count,
                mux_sel=loops_sram2tb.ports.mux_sel_out,
                finished=loops_sram2tb.ports.restart,
                valid_output=self._t_read[i])
Exemplo n.º 9
0
    def __init__(
            self,
            data_width=16,  # CGRA Params
            mem_width=64,
            mem_depth=512,
            banks=1,
            input_addr_iterator_support=6,
            output_addr_iterator_support=6,
            input_sched_iterator_support=6,
            output_sched_iterator_support=6,
            config_width=16,
            #  output_config_width=16,
            interconnect_input_ports=2,  # Connection to int
            interconnect_output_ports=2,
            mem_input_ports=1,
            mem_output_ports=1,
            read_delay=1,  # Cycle delay in read (SRAM vs Register File)
            rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
            agg_height=4,
            tb_height=2):

        super().__init__("strg_ub_tb_only")

        ##################################################################################
        # Capture constructor parameter...
        ##################################################################################
        self.fetch_width = mem_width // data_width
        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.agg_height = agg_height
        self.tb_height = tb_height
        self.mem_width = mem_width
        self.mem_depth = mem_depth
        self.config_width = config_width
        self.data_width = data_width
        self.input_addr_iterator_support = input_addr_iterator_support
        self.input_sched_iterator_support = input_sched_iterator_support

        self.default_iterator_support = 6
        self.default_config_width = 16
        self.sram_iterator_support = 6
        self.agg_rd_addr_gen_width = 8

        ##################################################################################
        # IO
        ##################################################################################
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        self._cycle_count = self.input("cycle_count", 16)

        # data from SRAM
        self._sram_read_data = self.input("sram_read_data",
                                          self.data_width,
                                          size=self.fetch_width,
                                          packed=True,
                                          explicit_array=True)
        # read enable from SRAM
        self._t_read = self.input("t_read", self.interconnect_output_ports)

        # sram to tb for loop
        self._loops_sram2tb_mux_sel = self.input(
            "loops_sram2tb_mux_sel",
            width=max(clog2(self.default_iterator_support), 1),
            size=self.interconnect_output_ports,
            explicit_array=True,
            packed=True)

        self._loops_sram2tb_restart = self.input(
            "loops_sram2tb_restart",
            width=1,
            size=self.interconnect_output_ports,
            explicit_array=True,
            packed=True)

        self._valid_out = self.output("accessor_output",
                                      self.interconnect_output_ports)
        self._data_out = self.output("data_out",
                                     self.data_width,
                                     size=self.interconnect_output_ports,
                                     packed=True,
                                     explicit_array=True)

        ##################################################################################
        # TB RELEVANT SIGNALS
        ##################################################################################
        self._tb = self.var("tb",
                            width=self.data_width,
                            size=(self.interconnect_output_ports,
                                  self.tb_height, self.fetch_width),
                            packed=True,
                            explicit_array=True)

        self._tb_write_addr = self.var("tb_write_addr",
                                       2 + max(1, clog2(self.tb_height)),
                                       size=self.interconnect_output_ports,
                                       packed=True,
                                       explicit_array=True)

        self._tb_read_addr = self.var("tb_read_addr",
                                      2 + max(1, clog2(self.tb_height)),
                                      size=self.interconnect_output_ports,
                                      packed=True,
                                      explicit_array=True)

        # write enable to tb, delayed 1 cycle from SRAM reads
        self._t_read_d1 = self.var("t_read_d1", self.interconnect_output_ports)
        # read enable for reads from tb
        self._tb_read = self.var("tb_read", self.interconnect_output_ports)

        # Break out valids...
        self.wire(self._valid_out, self._tb_read)

        # delayed input mux_sel and restart signals from sram read/tb write
        # for loop and scheduling
        self._mux_sel_d1 = self.var("mux_sel_d1",
                                    kts.clog2(self.default_iterator_support),
                                    size=self.interconnect_output_ports,
                                    packed=True,
                                    explicit_array=True)

        self._restart_d1 = self.var("restart_d1",
                                    width=1,
                                    size=self.interconnect_output_ports,
                                    explicit_array=True,
                                    packed=True)

        for i in range(self.interconnect_output_ports):
            # signals delayed by 1 cycle from SRAM
            @always_ff((posedge, "clk"), (negedge, "rst_n"))
            def delay_read():
                if ~self._rst_n:
                    self._t_read_d1[i] = 0
                    self._mux_sel_d1[i] = 0
                    self._restart_d1[i] = 0
                else:
                    self._t_read_d1[i] = self._t_read[i]
                    self._mux_sel_d1[i] = self._loops_sram2tb_mux_sel[i]
                    self._restart_d1[i] = self._loops_sram2tb_restart[i]

            self.add_code(delay_read)

        ##################################################################################
        # TB PATHS
        ##################################################################################
        for i in range(self.interconnect_output_ports):

            self.tb_iter_support = 6
            self.tb_addr_width = 4
            self.tb_range_width = 16

            _AG = AddrGen(iterator_support=self.default_iterator_support,
                          config_width=self.tb_addr_width)

            self.add_child(f"tb_write_addr_gen_{i}",
                           _AG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._t_read_d1[i],
                           mux_sel=self._mux_sel_d1[i],
                           restart=self._restart_d1[i])
            safe_wire(gen=self,
                      w_to=self._tb_write_addr[i],
                      w_from=_AG.ports.addr_out)

            @always_ff((posedge, "clk"))
            def tb_ctrl():
                if self._t_read_d1[i]:
                    self._tb[i][self._tb_write_addr[i][0]] = \
                        self._sram_read_data

            self.add_code(tb_ctrl)

            # READ FROM TB

            fl_ctr_tb_rd = ForLoop(iterator_support=self.tb_iter_support,
                                   config_width=self.tb_range_width)

            self.add_child(f"loops_buf2out_read_{i}",
                           fl_ctr_tb_rd,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._tb_read[i])

            _AG = AddrGen(iterator_support=self.tb_iter_support,
                          config_width=self.tb_addr_width)
            self.add_child(
                f"tb_read_addr_gen_{i}",
                _AG,
                clk=self._clk,
                rst_n=self._rst_n,
                step=self._tb_read[i],
                # addr_out=self._tb_read_addr[i])
                mux_sel=fl_ctr_tb_rd.ports.mux_sel_out,
                restart=fl_ctr_tb_rd.ports.restart)
            safe_wire(gen=self,
                      w_to=self._tb_read_addr[i],
                      w_from=_AG.ports.addr_out)

            self.add_child(
                f"tb_read_sched_gen_{i}",
                SchedGen(
                    iterator_support=self.tb_iter_support,
                    # config_width=self.tb_addr_width),
                    config_width=16),
                clk=self._clk,
                rst_n=self._rst_n,
                cycle_count=self._cycle_count,
                mux_sel=fl_ctr_tb_rd.ports.mux_sel_out,
                finished=fl_ctr_tb_rd.ports.restart,
                valid_output=self._tb_read[i])

            @always_comb
            def tb_to_out():
                self._data_out[i] = self._tb[i][self._tb_read_addr[i][
                    clog2(self.tb_height) + clog2(self.fetch_width) - 1,
                    clog2(self.fetch_width)]][self._tb_read_addr[i][
                        clog2(self.fetch_width) - 1, 0]]

            self.add_code(tb_to_out)
Exemplo n.º 10
0
    def __init__(self,
                 data_width=16,  # CGRA Params
                 mem_width=64,
                 mem_depth=512,
                 banks=1,
                 input_addr_iterator_support=6,
                 output_addr_iterator_support=6,
                 input_sched_iterator_support=6,
                 output_sched_iterator_support=6,
                 config_width=16,
                 #  output_config_width=16,
                 interconnect_input_ports=2,  # Connection to int
                 interconnect_output_ports=2,
                 mem_input_ports=1,
                 mem_output_ports=1,
                 read_delay=1,  # Cycle delay in read (SRAM vs Register File)
                 rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
                 agg_height=4,
                 tb_height=2):

        super().__init__("strg_ub_agg_only")

        ##################################################################################
        # Capture constructor parameter...
        ##################################################################################
        self.fetch_width = mem_width // data_width
        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.agg_height = agg_height
        self.tb_height = tb_height
        self.mem_width = mem_width
        self.mem_depth = mem_depth
        self.config_width = config_width
        self.data_width = data_width
        self.input_addr_iterator_support = input_addr_iterator_support
        self.input_sched_iterator_support = input_sched_iterator_support

        self.default_iterator_support = 6
        self.default_config_width = 16
        self.sram_iterator_support = 6
        self.agg_rd_addr_gen_width = 8

        ##################################################################################
        # IO
        ##################################################################################
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        self._cycle_count = self.input("cycle_count", 16)

        self._data_in = self.input("data_in", self.data_width,
                                   size=self.interconnect_input_ports,
                                   packed=True,
                                   explicit_array=True)

        self._agg_read = self.input("agg_read", self.interconnect_input_ports)

        self._floop_mux_sel = self.input("floop_mux_sel",
                                         width=max(clog2(self.default_iterator_support), 1),
                                         size=self.interconnect_input_ports,
                                         explicit_array=True,
                                         packed=True)

        self._floop_restart = self.input("floop_restart",
                                         width=1,
                                         size=self.interconnect_input_ports,
                                         explicit_array=True,
                                         packed=True)

        self._agg_data_out = self.output(f"agg_data_out", self.data_width,
                                         size=(self.interconnect_input_ports,
                                               self.fetch_width),
                                         packed=True,
                                         explicit_array=True)
        self._agg_data_out.add_attribute(FormalAttr(self._agg_data_out.name, FormalSignalConstraint.SEQUENCE, "sram"))

        ##################################################################################
        # AGG RELEVANT SIGNALS
        ##################################################################################
        # Create an input to agg write scheduler + addressor for each input
        # Also need an addressor for the mux in addition to the read addr
        self._agg = self.var(f"agg",
                             width=self.data_width,
                             size=(self.interconnect_input_ports,
                                   self.agg_height,
                                   self.fetch_width),
                             packed=True,
                             explicit_array=True)

        self._agg_write = self.var("agg_write", self.interconnect_input_ports)
        # Make this based on the size
        self._agg_write_addr = self.var("agg_write_addr", 2 + clog2(self.agg_height),
                                        size=self.interconnect_input_ports,
                                        packed=True,
                                        explicit_array=True)
        self._agg_read_addr = self.var("agg_read_addr", max(1, clog2(self.agg_height)),
                                       size=self.interconnect_input_ports,
                                       packed=True,
                                       explicit_array=True)
        self._agg_read_addr_gen_out = self.var("agg_read_addr_gen_out", self.agg_rd_addr_gen_width,
                                               size=self.interconnect_input_ports,
                                               packed=True,
                                               explicit_array=True)

        ##################################################################################
        # AGG PATHS
        ##################################################################################
        for i in range(self.interconnect_input_ports):

            self.agg_iter_support = 6
            self.agg_addr_width = 4
            self.agg_range_width = 16

            forloop_ctr = ForLoop(iterator_support=self.agg_iter_support,
                                  # config_width=self.default_config_width)
                                  config_width=self.agg_range_width)
            loop_itr = forloop_ctr.get_iter()
            loop_wth = forloop_ctr.get_cfg_width()

            self.add_child(f"loops_in2buf_{i}",
                           forloop_ctr,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._agg_write[i])

            newAG = AddrGen(iterator_support=self.agg_iter_support,
                            config_width=self.agg_addr_width)
            self.add_child(f"agg_write_addr_gen_{i}",
                           newAG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._agg_write[i],
                           mux_sel=forloop_ctr.ports.mux_sel_out,
                           restart=forloop_ctr.ports.restart)
            safe_wire(gen=self, w_to=self._agg_write_addr[i], w_from=newAG.ports.addr_out)

            newSG = SchedGen(iterator_support=self.agg_iter_support,
                             # config_width=self.agg_addr_width)
                             config_width=16)

            self.add_child(f"agg_write_sched_gen_{i}",
                           newSG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           mux_sel=forloop_ctr.ports.mux_sel_out,
                           finished=forloop_ctr.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self._agg_write[i])

            @always_ff((posedge, "clk"))
            def agg_ctrl():
                if self._agg_write[i]:
                    if self.agg_height == 1:
                        self._agg[i][0][self._agg_write_addr[i][clog2(self.fetch_width) - 1, 0]] = self._data_in[i]
                    else:
                        self._agg[i][self._agg_write_addr[i]
                                     [self._agg_write_addr[0].width - 1, clog2(self.fetch_width)]]\
                                    [self._agg_write_addr[i][clog2(self.fetch_width) - 1, 0]] = self._data_in[i]

            self.add_code(agg_ctrl)

            newAG = AddrGen(iterator_support=self.default_iterator_support,
                            config_width=self.agg_addr_width)

            self.add_child(f"agg_read_addr_gen_{i}",
                           newAG,
                           clk=self._clk,
                           rst_n=self._rst_n,
                           step=self._agg_read[i],
                           #  (self._input_port_sel_addr == const(i, self._input_port_sel_addr.width))),
                           # mux_sel=self._floop_mux_sel[i],
                           restart=self._floop_restart[i])
            safe_wire(gen=self, w_to=newAG.ports.mux_sel, w_from=self._floop_mux_sel[i])
            safe_wire(gen=self, w_to=self._agg_read_addr_gen_out[i], w_from=newAG.ports.addr_out)
            self.wire(self._agg_read_addr[i], self._agg_read_addr_gen_out[i][self._agg_read_addr.width - 1, 0])

            # Now pick out the data from the agg...
            @always_comb
            def get_agg_data():
                self._agg_data_out[i] = self._agg[i][self._agg_read_addr[i]]
            self.add_code(get_agg_data)
Exemplo n.º 11
0
    def __init__(
            self,
            data_width=16,  # CGRA Params
            mem_width=64,
            mem_depth=512,
            banks=1,
            input_addr_iterator_support=6,
            output_addr_iterator_support=6,
            input_sched_iterator_support=6,
            output_sched_iterator_support=6,
            config_width=16,
            #  output_config_width=16,
            interconnect_input_ports=2,  # Connection to int
            interconnect_output_ports=2,
            mem_input_ports=1,
            mem_output_ports=1,
            read_delay=1,  # Cycle delay in read (SRAM vs Register File)
            rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
            agg_height=4,
            tb_height=2):

        super().__init__("strg_ub_sram_only")

        ##################################################################################
        # Capture constructor parameter...
        ##################################################################################
        self.fetch_width = mem_width // data_width
        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.agg_height = agg_height
        self.mem_width = mem_width
        self.tb_height = tb_height
        self.mem_depth = mem_depth
        self.config_width = config_width
        self.data_width = data_width
        self.input_addr_iterator_support = input_addr_iterator_support
        self.input_sched_iterator_support = input_sched_iterator_support

        self.default_iterator_support = 6
        self.default_config_width = 16
        self.sram_iterator_support = 6
        self.agg_rd_addr_gen_width = 8

        ##################################################################################
        # IO
        ##################################################################################
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        self._cycle_count = self.input("cycle_count", 16)

        # agg to sram for loop
        self._floop_mux_sel = self.input(
            "floop_mux_sel",
            width=max(clog2(self.default_iterator_support), 1),
            size=self.interconnect_input_ports,
            explicit_array=True,
            packed=True)

        self._floop_restart = self.input("floop_restart",
                                         width=1,
                                         size=self.interconnect_input_ports,
                                         explicit_array=True,
                                         packed=True)

        # sram to tb for loop
        self._loops_sram2tb_mux_sel = self.input(
            "loops_sram2tb_mux_sel",
            width=max(clog2(self.default_iterator_support), 1),
            size=self.interconnect_output_ports,
            explicit_array=True,
            packed=True)

        self._loops_sram2tb_restart = self.input(
            "loops_sram2tb_restart",
            width=1,
            size=self.interconnect_output_ports,
            explicit_array=True,
            packed=True)

        self._agg_read = self.input("agg_read", self.interconnect_input_ports)
        self._t_read = self.input("t_read", self.interconnect_output_ports)

        # data from aggs, get decoded for sram_write_data which is wired to data_to_sram
        self._agg_data_out = self.input(f"agg_data_out",
                                        self.data_width,
                                        size=(self.interconnect_input_ports,
                                              self.fetch_width),
                                        packed=True,
                                        explicit_array=True)
        self._agg_data_out.add_attribute(
            FormalAttr(self._agg_data_out.name,
                       FormalSignalConstraint.SEQUENCE, "agg"))
        # sram attribute for data_in, comes from cut gen of agg_only for agg_data_out_top

        self._wen_to_sram = self.output("wen_to_sram", 1, packed=True)
        self._cen_to_sram = self.output("cen_to_sram", 1, packed=True)
        self._addr_to_sram = self.output("addr_to_sram",
                                         clog2(self.mem_depth),
                                         packed=True)
        self._data_to_sram = self.output("data_to_sram",
                                         self.data_width,
                                         size=self.fetch_width,
                                         packed=True)

        ##################################################################################
        # INTERNAL SIGNALS
        ##################################################################################
        self._s_write_addr = self.var("s_write_addr",
                                      self.config_width,
                                      size=self.interconnect_input_ports,
                                      packed=True,
                                      explicit_array=True)

        self._s_read_addr = self.var("s_read_addr",
                                     self.config_width,
                                     size=self.interconnect_output_ports,
                                     packed=True,
                                     explicit_array=True)

        self._write = self.var("write", 1)
        self._read = self.var("read", 1)
        self._addr = self.var("addr", clog2(self.mem_depth))

        self._sram_write_data = self.var("sram_write_data",
                                         data_width,
                                         size=self.fetch_width,
                                         packed=True)

        self.mem_addr_width = clog2(self.mem_depth)

        for i in range(self.interconnect_input_ports):

            _AG = AddrGen(iterator_support=self.default_iterator_support,
                          config_width=self.mem_addr_width)
            self.add_child(
                f"input_addr_gen_{i}",
                _AG,
                clk=self._clk,
                rst_n=self._rst_n,
                step=self._agg_read[i],
                # mux_sel=self._floop_mux_sel[i],
                restart=self._floop_restart[i])
            safe_wire(gen=self,
                      w_to=_AG.ports.mux_sel,
                      w_from=self._floop_mux_sel[i])
            safe_wire(gen=self,
                      w_to=self._s_write_addr[i],
                      w_from=_AG.ports.addr_out)

        ##################################################################################
        # TB PATHS
        ##################################################################################
        for i in range(self.interconnect_output_ports):

            _AG = AddrGen(iterator_support=self.default_iterator_support,
                          config_width=self.mem_addr_width)
            self.add_child(
                f"output_addr_gen_{i}",
                _AG,
                clk=self._clk,
                rst_n=self._rst_n,
                step=self._t_read[i],
                # mux_sel=self._loops_sram2tb_mux_sel[i],
                restart=self._loops_sram2tb_restart[i])
            safe_wire(gen=self,
                      w_to=_AG.ports.mux_sel,
                      w_from=self._loops_sram2tb_mux_sel[i])
            safe_wire(gen=self,
                      w_to=self._s_read_addr[i],
                      w_from=_AG.ports.addr_out)

        ##################################################################################
        # WIRE TO SRAM INTERFACE
        ##################################################################################
        # Now select the write address as a decode of the underlying enables
        self.wire(self._addr_to_sram, self._addr)
        self.wire(self._data_to_sram, self._sram_write_data)
        self.wire(self._wen_to_sram, self._write)
        self.wire(self._cen_to_sram, self._write | self._read)

        self.wire(self._write, self._agg_read.r_or())
        self.wire(self._read, self._t_read.r_or())

        self.wire(self._sram_write_data,
                  decode(self, self._agg_read, self._agg_data_out))

        self._write_addr = decode(self, self._agg_read, self._s_write_addr)
        self._read_addr = decode(self, self._t_read, self._s_read_addr)
        self.add_code(self.set_sram_addr)