Пример #1
0
def test_interconnect_point_wise(batch_size: int, run_tb, io_sides):
    # we test a simple point-wise multiplier function
    # to account for different CGRA size, we feed in data to the very top-left
    # SB and route through horizontally to reach very top-right SB
    # we configure the top-left PE as multiplier
    chip_size = 2
    interconnect = create_cgra(chip_size, chip_size, io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("p0", "data0")],
        "e1": [("I1", "io2f_16"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I2", "f2io_16")],
    }
    bus = {"e0": 16, "e1": 16, "e3": 16}

    placement, routing, _ = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]
    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.umult0())
    for addr, data in add_bs:
        config_data.append((interconnect.get_config_addr(addr, 0, x, y), data))
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.zero_inputs()
    tester.reset()
    # set the PE core
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    tester.done_config()

    src0 = placement["I0"]
    src1 = placement["I1"]
    src_name0 = interconnect.get_top_input_port_by_coord(src0, 16)
    src_name1 = interconnect.get_top_input_port_by_coord(src1, 16)
    dst = placement["I2"]
    dst_name = interconnect.get_top_output_port_by_coord(dst, 16)
    random.seed(0)
    for _ in range(batch_size):
        num_1 = random.randrange(0, 256)
        num_2 = random.randrange(0, 256)
        tester.poke(circuit.interface[src_name0], num_1)
        tester.poke(circuit.interface[src_name1], num_2)

        tester.eval()
        tester.expect(circuit.interface[dst_name], num_1 * num_2)

    run_tb(tester)
def test_1x1():
    # this is all PE
    interconnect = create_cgra(1, 1, IOSide.None_, num_tracks=3, mem_ratio=(0, 1))
    circuit = interconnect.circuit()
    with tempfile.TemporaryDirectory() as temp:
        filename = os.path.join(temp, "1x1")
        magma.compile(filename, circuit)
        assert os.path.isfile(filename + ".v")
Пример #3
0
def test_interconnect_reset(batch_size: int, run_tb, io_sides):
    # we test a simple point-wise multiplier function
    # to account for different CGRA size, we feed in data to the very top-left
    # SB and route through horizontally to reach very top-right SB
    # we configure the top-left PE as multiplier
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("p0", "data0")],
        "e1": [("I1", "io2f_16"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I2", "f2io_16")],
    }
    bus = {"e0": 16, "e1": 16, "e3": 16}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]

    tile_id = x << 8 | y
    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.umult0())
    for addr, data in add_bs:
        config_data.append(((addr << 24) | tile_id, data))
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    # reset them
    tester.reset()
    for addr, index in config_data:
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, 0)

    # configure new one
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    run_tb(tester)
Пример #4
0
def _run(directory, width=2, height=2):
    """Generates and writes SV testbench in @directory"""
    # Create cgra generator object.
    interconnect = create_cgra(width=width,
                               height=height,
                               io_sides=IOSide.North,
                               num_tracks=5,
                               add_pd=True)
    # Poke the circuit with a reset sequence and short configuration sequence.
    sequence = common.basic_sequence(interconnect)
    sequence = sequence[:2]  # limit to 2 addr's
    circuit = interconnect.circuit()
    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    common.configure(tester, sequence, check_read_data=True)
    common.generate_testbench(tester, directory)
Пример #5
0
def test_peak_tile_sequence(sequence, seed, run_tb):
    """
    Tile level test:
    * Generates 1x1 CGRA
    * configures PE_tile using test application
    * similar input driver and output monitor behavior to core test except:
      * inputs are driven onto the appropriate tile ports based on the
        generated route for the application
      * output is similarly monitored based on the generate route
    """
    interconnect = create_cgra(1, 1, IOSide.None_, num_tracks=3,
                               standalone=True)

    routing, port_mapping = route_one_tile(interconnect, 0, 0,
                                           ports=["data0", "data1", "alu_res"],
                                           seed=seed)
    route_config = interconnect.get_route_bitstream(routing)
    route_config = compress_config_data(route_config)

    x, y = 0, 0
    circuit = interconnect.circuit()
    input_a = port_mapping["data0"]
    input_b = port_mapping["data1"]
    output_port = port_mapping["alu_res"]

    class TileDriver(Driver):
        def lower(self, config_data, a, b, output):
            for addr, data in config_data:
                addr = interconnect.get_config_addr(addr, 0, x, y)
                self.tester.configure(addr, data)
            setattr(self.tester.circuit, input_a, a)
            setattr(self.tester.circuit, input_b, b)

    class TileMonitor(Monitor):
        def observe(self, config_data, a, b, output):
            getattr(self.tester.circuit, output_port).expect(output)

    tester = BasicSequenceTester(circuit, TileDriver(), TileMonitor(),
                                 sequence, circuit.clk, circuit.reset)
    tester.reset()
    for addr, data in route_config:
        tester.configure(addr, data)

    run_tb(tester)
Пример #6
0
def test_basic(run_tb):
    """
    Configuration sequence test on 2x2 fabric + IO tiles.
    """
    # Create cgra generator object.
    chip_size = 2
    interconnect = create_cgra(width=chip_size,
                               height=chip_size,
                               io_sides=IOSide.North,
                               num_tracks=5,
                               add_pd=True)
    # Poke the circuit with a reset sequence and short configuration sequence.
    sequence = common.basic_sequence(interconnect)
    sequence = sequence[:2]  # limit to 2 addr's
    circuit = interconnect.circuit()
    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    common.configure(tester, sequence, check_read_data=True)

    # Compile and run the test using a verilator backend.
    run_tb(tester)
Пример #7
0
def test_basic(dw_files):
    """
    Configuration sequence test on 2x2 fabric + IO tiles.
    """
    # Create cgra generator object.
    chip_size = 2
    interconnect = create_cgra(width=chip_size, height=chip_size,
                               io_sides=IOSide.North, num_tracks=5, add_pd=True)
    # Poke the circuit with a reset sequence and short configuration sequence.
    sequence = common.basic_sequence(interconnect)
    sequence = sequence[:2]  # limit to 2 addr's
    circuit = interconnect.circuit()
    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    common.configure(tester, sequence, check_read_data=True)

    # Compile and run the test using a verilator backend.
    with tempfile.TemporaryDirectory() as tempdir:
        common.generate_scaffolding(tempdir)
        magma.compile(f"{tempdir}/{circuit.name}", circuit,
                      output="coreir-verilog", coreir_libs={"float_DW"})
        tester.compile_and_run(skip_compile=True, target="verilator",
                               directory=tempdir, flags=["-Wno-fatal"])
Пример #8
0
def test_stall(run_tb, io_sides):
    chip_size = 2
    depth = 10
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("r1", "reg")],
        "e2": [("r1", "reg"), ("m0", "data_in_0"), ("p0", "data0")],
        "e1": [("m0", "data_out_0"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I1", "f2io_16")],
        "e4": [("i3", "io2f_1"), ("m0", "wen_in_0"), ("m0", "ren_in_0")],
        "e5": [("m0", "valid_out_0"), ("i4", "f2io_1")]
    }
    bus = {"e0": 16, "e2": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]

    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.add(ra_mode=asm.Mode_t.DELAY))
    for addr, data in add_bs:
        config_data.append((interconnect.get_config_addr(addr, 0, x, y), data))

    tile_en = 1

    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core

    configs_mem = [
        ("strg_ub_app_ctrl_input_port_0", 0, 0),
        ("strg_ub_app_ctrl_output_port_0", 0, 0),
        ("strg_ub_app_ctrl_read_depth_0", depth, 0),
        ("strg_ub_app_ctrl_write_depth_wo_0", depth, 0),
        ("strg_ub_app_ctrl_write_depth_ss_0", depth, 0),
        ("strg_ub_app_ctrl_coarse_input_port_0", 0, 0),
        ("strg_ub_app_ctrl_coarse_read_depth_0", 1, 0),
        ("strg_ub_app_ctrl_coarse_write_depth_wo_0", 1, 0),
        ("strg_ub_app_ctrl_coarse_write_depth_ss_0", 1, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_dimensionality", 2, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_ranges_0", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_ranges_1", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_starting_addr", 0, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_0", 1, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_1", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_2", 0, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_3", 0, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_dimensionality", 2, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_ranges_0", 512, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_ranges_1", 512, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_starting_addr", 0, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_strides_0", 1, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_strides_1", 512, 0),
        ("strg_ub_sync_grp_sync_group_0", 1, 0),
        ("strg_ub_tba_0_tb_0_range_outer", depth, 0),
        ("strg_ub_tba_0_tb_0_starting_addr", 0, 0),
        ("strg_ub_tba_0_tb_0_stride", 1, 0),
        ("strg_ub_tba_0_tb_0_dimensionality", 1, 0),
        ("strg_ub_agg_align_0_line_length", depth, 0),
        ("strg_ub_tba_0_tb_0_indices_0", 0, 0),
        ("strg_ub_tba_0_tb_0_indices_1", 1, 0),
        ("strg_ub_tba_0_tb_0_indices_2", 2, 0),
        ("strg_ub_tba_0_tb_0_indices_3", 3, 0),
        ("strg_ub_tba_0_tb_0_range_inner", 4, 0),
        ("strg_ub_tba_0_tb_0_tb_height", 1, 0), ("tile_en", tile_en, 0),
        ("mode", 0, 0), ("flush_reg_sel", 1, 0), ("wen_in_1_reg_sel", 1, 0),
        ("ren_in_1_reg_sel", 1, 0)
    ]
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y,
                    mcore)
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    # stall the chip
    tester.poke(circuit.interface["stall"], 1)
    tester.eval()

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    # un-stall the chp
    # stall the chip
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    src_x, src_y = placement["I0"]
    src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}"
    dst_x, dst_y = placement["I1"]
    dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    wen_x, wen_y = placement["i3"]
    wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}"
    valid_x, valid_y = placement["i4"]
    valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}"

    tester.poke(circuit.interface[wen], 1)

    for i in range(20):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        if i >= 10 + 1:
            # data0 of PE: i - 1 - 1
            # data1 of PE: i - 1 - depth
            tester.expect(circuit.interface[dst], i * 2 - 3 - depth)
            tester.expect(circuit.interface[valid], 1)
        elif i < depth:
            tester.expect(circuit.interface[valid], 0)
        if i == 19:
            # now stall everything
            tester.poke(circuit.interface["stall"], 1)
            tester.eval()
        tester.step(2)

    for i in range(20):
        # poke random numbers. it shouldn't matter
        tester.poke(circuit.interface[src], i * 20)
        tester.expect(circuit.interface[dst], 19 * 2 - 3 - depth)
        tester.step(2)

    # un-stall again
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    for i in range(19, 30):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        tester.expect(circuit.interface[dst], i * 2 - 3 - depth)
        tester.expect(circuit.interface[valid], 1)
        tester.step(2)

    run_tb(tester)
Пример #9
0
    def __init__(self,
                 width,
                 height,
                 add_pd,
                 interconnect_only: bool = False,
                 use_sram_stub: bool = True):
        super().__init__()

        # configuration parameters
        config_addr_width = 32
        config_data_width = 32
        axi_addr_width = 12
        tile_id_width = 16
        config_addr_reg_width = 8
        num_tracks = 5

        # size
        self.width = width
        self.height = height

        # only north side has IO
        io_side = IOSide.North

        # global buffer parameters
        num_banks = 32
        bank_addr_width = 17
        bank_data_width = 64
        glb_addr_width = 32

        # parallel configuration parameter
        num_parallel_cfg = math.ceil(width / 4)

        # number of input/output channels parameter
        num_io = math.ceil(width / 4)

        if not interconnect_only:
            wiring = GlobalSignalWiring.ParallelMeso
            self.global_controller = GlobalController(config_addr_width,
                                                      config_data_width,
                                                      axi_addr_width)

            self.global_buffer = GlobalBuffer(num_banks=num_banks,
                                              num_io=num_io,
                                              num_cfg=num_parallel_cfg,
                                              bank_addr_width=bank_addr_width,
                                              glb_addr_width=glb_addr_width,
                                              cfg_addr_width=config_addr_width,
                                              cfg_data_width=config_data_width,
                                              axi_addr_width=axi_addr_width)
        else:
            wiring = GlobalSignalWiring.Meso

        interconnect = create_cgra(width,
                                   height,
                                   io_side,
                                   reg_addr_width=config_addr_reg_width,
                                   config_data_width=config_data_width,
                                   tile_id_width=tile_id_width,
                                   num_tracks=num_tracks,
                                   add_pd=add_pd,
                                   use_sram_stub=use_sram_stub,
                                   global_signal_wiring=wiring,
                                   num_parallel_config=num_parallel_cfg,
                                   mem_ratio=(1, 4))

        self.interconnect = interconnect

        if not interconnect_only:
            self.add_ports(
                jtag=JTAGType,
                clk_in=magma.In(magma.Clock),
                reset_in=magma.In(magma.AsyncReset),
                soc_data=SoCDataType(glb_addr_width, bank_data_width),
                axi4_ctrl=AXI4SlaveType(axi_addr_width, config_data_width),
                cgra_running_clk_out=magma.Out(magma.Clock),
            )

            # top <-> global controller ports connection
            self.wire(self.ports.clk_in, self.global_controller.ports.clk_in)
            self.wire(self.ports.reset_in,
                      self.global_controller.ports.reset_in)
            self.wire(self.ports.jtag, self.global_controller.ports.jtag)
            self.wire(self.ports.axi4_ctrl,
                      self.global_controller.ports.axi4_ctrl)
            self.wire(self.ports.cgra_running_clk_out,
                      self.global_controller.ports.clk_out)

            # top <-> global buffer ports connection
            self.wire(self.ports.soc_data, self.global_buffer.ports.soc_data)
            glc_interconnect_wiring(self)
            glb_glc_wiring(self)
            glb_interconnect_wiring(self, width, num_parallel_cfg)
        else:
            # lift all the interconnect ports up
            self._lift_interconnect_ports(config_data_width)

        self.mapper_initalized = False
        self.__rewrite_rules = None
Пример #10
0
    def __init__(self,
                 width,
                 height,
                 add_pd,
                 interconnect_only: bool = False,
                 use_sram_stub: bool = True,
                 standalone: bool = False):
        super().__init__()

        # Check consistency of @standalone and @interconnect_only parameters. If
        # @standalone is True, then interconnect_only must also be True.
        if standalone:
            assert interconnect_only

        # configuration parameters
        config_addr_width = 32
        config_data_width = 32
        self.config_addr_width = config_addr_width
        self.config_data_width = config_data_width
        axi_addr_width = 13
        glb_axi_addr_width = 12
        axi_data_width = 32
        # axi_data_width must be same as cgra config_data_width
        assert axi_data_width == config_data_width

        tile_id_width = 16
        config_addr_reg_width = 8
        num_tracks = 5

        # size
        self.width = width
        self.height = height

        # only north side has IO
        if standalone:
            io_side = IOSide.None_
        else:
            io_side = IOSide.North

        if not interconnect_only:
            # global buffer parameters
            # width must be even number
            assert (self.width % 2) == 0
            num_glb_tiles = self.width // 2

            bank_addr_width = 17
            bank_data_width = 64
            banks_per_tile = 2

            glb_addr_width = (bank_addr_width +
                              magma.bitutils.clog2(banks_per_tile) +
                              magma.bitutils.clog2(num_glb_tiles))

            # bank_data_width must be the size of bitstream
            assert bank_data_width == config_addr_width + config_data_width

            wiring = GlobalSignalWiring.ParallelMeso
            self.global_controller = GlobalController(
                addr_width=config_addr_width,
                data_width=config_data_width,
                axi_addr_width=axi_addr_width,
                axi_data_width=axi_data_width,
                num_glb_tiles=num_glb_tiles,
                glb_addr_width=glb_addr_width,
                block_axi_addr_width=glb_axi_addr_width)

            self.global_buffer = GlobalBuffer(
                num_glb_tiles=num_glb_tiles,
                num_cgra_cols=width,
                bank_addr_width=bank_addr_width,
                bank_data_width=bank_data_width,
                cfg_addr_width=config_addr_width,
                cfg_data_width=config_data_width,
                axi_addr_width=glb_axi_addr_width,
                axi_data_width=axi_data_width)
        else:
            wiring = GlobalSignalWiring.Meso

        interconnect = create_cgra(width,
                                   height,
                                   io_side,
                                   reg_addr_width=config_addr_reg_width,
                                   config_data_width=config_data_width,
                                   tile_id_width=tile_id_width,
                                   num_tracks=num_tracks,
                                   add_pd=add_pd,
                                   use_sram_stub=use_sram_stub,
                                   global_signal_wiring=wiring,
                                   mem_ratio=(1, 4),
                                   standalone=standalone)

        self.interconnect = interconnect

        if not interconnect_only:
            self.add_ports(
                jtag=JTAGType,
                clk_in=magma.In(magma.Clock),
                reset_in=magma.In(magma.AsyncReset),
                proc_packet=ProcPacketIfc(glb_addr_width,
                                          bank_data_width).slave,
                axi4_slave=AXI4LiteIfc(axi_addr_width, axi_data_width).slave,
                interrupt=magma.Out(magma.Bit),
                cgra_running_clk_out=magma.Out(magma.Clock),
            )

            # top <-> global controller ports connection
            self.wire(self.ports.clk_in, self.global_controller.ports.clk_in)
            self.wire(self.ports.reset_in,
                      self.global_controller.ports.reset_in)
            self.wire(self.ports.jtag, self.global_controller.ports.jtag)
            self.wire(self.ports.axi4_slave,
                      self.global_controller.ports.axi4_slave)
            self.wire(self.ports.interrupt,
                      self.global_controller.ports.interrupt)
            self.wire(self.ports.cgra_running_clk_out,
                      self.global_controller.ports.clk_out)

            # top <-> global buffer ports connection
            self.wire(self.ports.proc_packet,
                      self.global_buffer.ports.proc_packet)
            glb_glc_wiring(self)
            glb_interconnect_wiring(self)
            glc_interconnect_wiring(self)
        else:
            # lift all the interconnect ports up
            for name in self.interconnect.interface():
                self.add_port(name, self.interconnect.ports[name].type())
                self.wire(self.ports[name], self.interconnect.ports[name])

            self.add_ports(
                clk=magma.In(magma.Clock),
                reset=magma.In(magma.AsyncReset),
                config=magma.In(
                    ConfigurationType(self.interconnect.config_data_width,
                                      self.interconnect.config_data_width)),
                stall=magma.In(
                    magma.Bits[self.interconnect.stall_signal_width]),
                read_config_data=magma.Out(magma.Bits[config_data_width]))

            self.wire(self.ports.clk, self.interconnect.ports.clk)
            self.wire(self.ports.reset, self.interconnect.ports.reset)

            self.wire(self.ports.config, self.interconnect.ports.config)
            self.wire(self.ports.stall, self.interconnect.ports.stall)

            self.wire(self.interconnect.ports.read_config_data,
                      self.ports.read_config_data)
Пример #11
0
def test_interconnect_sram(cw_files, add_pd, io_sides):
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=add_pd,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "addr_in")],
        "e1": [("m0", "data_out"), ("I1", "f2io_16")],
        "e2": [("i3", "io2f_1"), ("m0", "ren_in")]
    }
    bus = {"e0": 16, "e1": 16, "e2": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["m0"]
    sram_config_addr = interconnect.get_config_addr(0, 0, x, y)
    # in this case we configure (1, 0) as sram mode
    config_data.append((sram_config_addr, 0x00000006))

    sram_data = []
    # add SRAM data
    for i in range(0, 1024, 4):
        feat_addr = i // 256 + 1
        mem_addr = i % 256
        sram_data.append((interconnect.get_config_addr(mem_addr, feat_addr, x,
                                                       y), i + 10))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    for addr, data in sram_data:
        tester.configure(addr, data)
        # currently read back doesn't work
        # tester.config_read(addr)
        # tester.eval()
        # tester.expect(circuit.read_config_data, data)

    addr_x, addr_y = placement["I0"]
    src = f"glb2io_16_X{addr_x:02X}_Y{addr_y:02X}"
    dst_x, dst_y = placement["I1"]
    dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    ren_x, ren_y = placement["i3"]
    ren = f"glb2io_1_X{ren_x:02X}_Y{ren_y:02X}"

    tester.step(2)
    tester.poke(circuit.interface[ren], 1)
    tester.eval()

    for i in range(0, 1024, 4):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        tester.step(2)
        tester.eval()
        tester.expect(circuit.interface[dst], i + 10)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in cw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               directory=tempdir,
                               flags=["-Wno-fatal"])
def test_interconnect_fifo(dw_files, io_sides, depth):

    # NEW: PASSES

    # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS
    # Basically same

    chip_size = 2
    interconnect = create_cgra(chip_size, chip_size, io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "data_in_0")],
        "e1": [("i3", "io2f_1"), ("m0", "wen_in_0")],
        "e2": [("i4", "io2f_1"), ("m0", "ren_in_0")],
        "e3": [("m0", "data_out_0"), ("I1", "f2io_16")],
        "e4": [("m0", "valid_out_0"), ("i4", "f2io_1")],
        "e5": [("m0", "empty"), ("i2", "f2io_1")],
        "e6": [("m0", "full"), ("i3", "f2io_1")]
    }
    bus = {"e0": 16, "e1": 1, "e2": 1, "e3": 16, "e4": 1, "e5": 1, "e6": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    # in this case we configure m0 as fifo mode
    mode = 1  # Mode.FIFO
    tile_en = 1

    almost_count = 3
    if(depth < 5):
        almost_count = 0

    configs_mem = [("fifo_ctrl_fifo_depth", depth, 0),
                   ("mode", 1, 0),
                   ("tile_en", tile_en, 0),
                   ("flush_reg_sel", 1, 0)]
    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore)
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    src_coord = placement["I0"]
    src = interconnect.get_top_input_port_by_coord(src_coord, 16)
    dst_coord = placement["I1"]
    dst = interconnect.get_top_output_port_by_coord(dst_coord, 16)
    wen_coord = placement["i3"]
    wen = interconnect.get_top_input_port_by_coord(wen_coord, 1)
    valid_coord = placement["i4"]
    valid = interconnect.get_top_output_port_by_coord(valid_coord, 1)
    ren_coord = placement["i4"]
    ren = interconnect.get_top_input_port_by_coord(ren_coord, 1)
    full_coord = placement["i3"]
    full = interconnect.get_top_output_port_by_coord(full_coord, 1)
    empty_coord = placement["i2"]
    empty = interconnect.get_top_output_port_by_coord(empty_coord, 1)

    tester.step(1)

    fifo = deque()
    valid_check = 0
    most_recent_read = 0
    for i in range(2048):

        len_fifo = len(fifo)

        # Pick random from (READ, WRITE, READ_AND_WRITE)
        move = random.randint(0, 3)
        if move == 0:
            # read
            tester.poke(circuit.interface[ren], 1)
            if(len(fifo) > 0):
                most_recent_read = fifo.pop()
                # tester.expect(circuit.interface[dst], most_recent_read)
                valid_check = 1
            else:
                valid_check = 0
        elif move == 1:
            # write
            write_val = random.randint(0, 60000)
            tester.poke(circuit.interface[wen], 1)
            tester.poke(circuit.interface[src], write_val)
            if(len(fifo) < depth):
                fifo.appendleft(write_val)
            valid_check = 0
        elif move == 2:
            # r and w
            write_val = random.randint(0, 60000)
            tester.poke(circuit.interface[wen], 1)
            tester.poke(circuit.interface[ren], 1)
            tester.poke(circuit.interface[src], write_val)
            fifo.appendleft(write_val)
            most_recent_read = fifo.pop()
            valid_check = 1
        else:
            # If not doing anything, valid will be low, and we expect
            # to see the same output as before
            valid_check = 0
        tester.eval()

        tester.expect(circuit.interface[empty], len_fifo == 0)
        tester.expect(circuit.interface[full], len_fifo == depth)
        tester.expect(circuit.interface[valid], valid_check)
        if valid_check:
            tester.expect(circuit.interface[dst], most_recent_read)
        tester.step(2)

        tester.poke(circuit.interface[wen], 0)
        tester.poke(circuit.interface[ren], 0)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in dw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core",
                                 "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               magma_opts={"coreir_libs": {"float_DW"}},
                               directory=tempdir,
                               flags=["-Wno-fatal"])
def test_interconnect_sram(dw_files, io_sides):

    # NEW: PASSES

    # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS
    # Basically same
    chip_size = 2
    interconnect = create_cgra(chip_size, chip_size, io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "addr_in_0")],
        "e1": [("m0", "data_out_0"), ("I1", "f2io_16")],
        "e2": [("i3", "io2f_1"), ("m0", "ren_in_0")]
    }
    bus = {"e0": 16, "e1": 16, "e2": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    mode = 2  # Mode.SRAM
    tile_en = 1
    configs_mem = [("mode", mode, 0),
                   ("tile_en", tile_en, 0),
                   ("flush_reg_sel", 1, 0)]
    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore)
    config_data = compress_config_data(config_data)

    # in this case we configure (1, 0) as sram mode
    sram_data = []
    # add SRAM data
    for i in range(0, 512):
        feat_addr = i // 256 + 1
        mem_addr = i % 256
        sram_data.append((interconnect.get_config_addr(mem_addr, feat_addr,
                                                       mem_x, mem_y),
                          i))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    for addr, data in sram_data:
        for i in range(4):
            tester.configure(addr, data * 4 + i)
            tester.eval()
        # currently read back doesn't work
        for i in range(4):
            tester.config_read(addr)
            tester.eval()
            tester.expect(circuit.read_config_data, data * 4 + i)

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    tester.done_config()

    addr_coord = placement["I0"]
    src = interconnect.get_top_input_port_by_coord(addr_coord, 16)
    dst_coord = placement["I1"]
    dst = interconnect.get_top_output_port_by_coord(dst_coord, 16)
    ren_coord = placement["i3"]
    ren = interconnect.get_top_input_port_by_coord(ren_coord, 1)

    tester.step(2)
    tester.poke(circuit.interface[ren], 1)
    tester.eval()

    for i in range(2048):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        tester.step(2)
        tester.eval()
        tester.expect(circuit.interface[dst], i)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in dw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core",
                                 "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               magma_opts={"coreir_libs": {"float_DW"}},
                               directory=tempdir,
                               flags=["-Wno-fatal"])
Пример #14
0
    def __init__(self, width, height, add_pd, interconnect_only: bool = False,
                 use_sram_stub: bool = True, standalone: bool = False,
                 add_pond: bool = True,
                 use_io_valid: bool = False,
                 pipeline_config_interval: int = 8,
                 glb_params: GlobalBufferParams = GlobalBufferParams(),
                 pe_fc=lassen_fc):
        super().__init__()

        # Check consistency of @standalone and @interconnect_only parameters. If
        # @standalone is True, then interconnect_only must also be True.
        if standalone:
            assert interconnect_only

        # configuration parameters
        self.glb_params = glb_params
        config_addr_width = 32
        config_data_width = 32
        self.config_addr_width = config_addr_width
        self.config_data_width = config_data_width
        axi_addr_width = 13
        axi_data_width = 32
        # axi_data_width must be same as cgra config_data_width
        assert axi_data_width == config_data_width

        tile_id_width = 16
        config_addr_reg_width = 8
        num_tracks = 5

        # size
        self.width = width
        self.height = height

        # only north side has IO
        if standalone:
            io_side = IOSide.None_
        else:
            io_side = IOSide.North

        self.pe_fc = pe_fc

        if not interconnect_only:
            # width must be even number
            assert (self.width % 2) == 0

            # Bank should be larger than or equal to 1KB
            assert glb_params.bank_addr_width >= 10

            glb_tile_mem_size = 2 ** (glb_params.bank_addr_width - 10) + \
                math.ceil(math.log(glb_params.banks_per_tile, 2))
            wiring = GlobalSignalWiring.ParallelMeso
            self.global_controller = GlobalController(addr_width=config_addr_width,
                                                      data_width=config_data_width,
                                                      axi_addr_width=axi_addr_width,
                                                      axi_data_width=axi_data_width,
                                                      num_glb_tiles=glb_params.num_glb_tiles,
                                                      glb_addr_width=glb_params.glb_addr_width,
                                                      glb_tile_mem_size=glb_tile_mem_size,
                                                      block_axi_addr_width=glb_params.axi_addr_width)

            self.global_buffer = GlobalBufferMagma(glb_params)

        else:
            wiring = GlobalSignalWiring.Meso

        interconnect = create_cgra(width, height, io_side,
                                   reg_addr_width=config_addr_reg_width,
                                   config_data_width=config_data_width,
                                   tile_id_width=tile_id_width,
                                   num_tracks=num_tracks,
                                   add_pd=add_pd,
                                   add_pond=add_pond,
                                   use_io_valid=use_io_valid,
                                   use_sram_stub=use_sram_stub,
                                   global_signal_wiring=wiring,
                                   pipeline_config_interval=pipeline_config_interval,
                                   mem_ratio=(1, 4),
                                   standalone=standalone,
                                   pe_fc=pe_fc)

        self.interconnect = interconnect

        # make multiple stall ports
        stall_port_pass(self.interconnect)
        # make multiple configuration ports
        config_port_pass(self.interconnect)

        if not interconnect_only:
            self.add_ports(
                jtag=JTAGType,
                clk_in=magma.In(magma.Clock),
                reset_in=magma.In(magma.AsyncReset),
                proc_packet=ProcPacketIfc(
                    glb_params.glb_addr_width, glb_params.bank_data_width).slave,
                axi4_slave=AXI4LiteIfc(axi_addr_width, axi_data_width).slave,
                interrupt=magma.Out(magma.Bit),
                cgra_running_clk_out=magma.Out(magma.Clock),
            )

            # top <-> global controller ports connection
            self.wire(self.ports.clk_in, self.global_controller.ports.clk_in)
            self.wire(self.ports.reset_in,
                      self.global_controller.ports.reset_in)
            self.wire(self.ports.jtag, self.global_controller.ports.jtag)
            self.wire(self.ports.axi4_slave,
                      self.global_controller.ports.axi4_slave)
            self.wire(self.ports.interrupt,
                      self.global_controller.ports.interrupt)
            self.wire(self.ports.cgra_running_clk_out,
                      self.global_controller.ports.clk_out)

            # top <-> global buffer ports connection
            self.wire(self.ports.clk_in, self.global_buffer.ports.clk)
            self.wire(self.ports.proc_packet.wr_en,
                      self.global_buffer.ports.proc_wr_en[0])
            self.wire(self.ports.proc_packet.wr_strb,
                      self.global_buffer.ports.proc_wr_strb)
            self.wire(self.ports.proc_packet.wr_addr,
                      self.global_buffer.ports.proc_wr_addr)
            self.wire(self.ports.proc_packet.wr_data,
                      self.global_buffer.ports.proc_wr_data)
            self.wire(self.ports.proc_packet.rd_en,
                      self.global_buffer.ports.proc_rd_en[0])
            self.wire(self.ports.proc_packet.rd_addr,
                      self.global_buffer.ports.proc_rd_addr)
            self.wire(self.ports.proc_packet.rd_data,
                      self.global_buffer.ports.proc_rd_data)
            self.wire(self.ports.proc_packet.rd_data_valid,
                      self.global_buffer.ports.proc_rd_data_valid[0])

            # Top -> Interconnect clock port connection
            self.wire(self.ports.clk_in, self.interconnect.ports.clk)

            glb_glc_wiring(self)
            glb_interconnect_wiring(self)
            glc_interconnect_wiring(self)
        else:
            # lift all the interconnect ports up
            for name in self.interconnect.interface():
                self.add_port(name, self.interconnect.ports[name].type())
                self.wire(self.ports[name], self.interconnect.ports[name])

            self.add_ports(
                clk=magma.In(magma.Clock),
                reset=magma.In(magma.AsyncReset),
                config=magma.In(magma.Array[width,
                                ConfigurationType(config_data_width,
                                                  config_data_width)]),
                stall=magma.In(
                    magma.Bits[self.width * self.interconnect.stall_signal_width]),
                read_config_data=magma.Out(magma.Bits[config_data_width])
            )

            self.wire(self.ports.clk, self.interconnect.ports.clk)
            self.wire(self.ports.reset, self.interconnect.ports.reset)

            self.wire(self.ports.config,
                      self.interconnect.ports.config)
            self.wire(self.ports.stall,
                      self.interconnect.ports.stall)

            self.wire(self.interconnect.ports.read_config_data,
                      self.ports.read_config_data)
Пример #15
0
def test_interconnect_reset(batch_size: int, dw_files, io_sides):
    # we test a simple point-wise multiplier function
    # to account for different CGRA size, we feed in data to the very top-left
    # SB and route through horizontally to reach very top-right SB
    # we configure the top-left PE as multiplier
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("p0", "data0")],
        "e1": [("I1", "io2f_16"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I2", "f2io_16")],
    }
    bus = {"e0": 16, "e1": 16, "e3": 16}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]

    tile_id = x << 8 | y
    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.umult0())
    for addr, data in add_bs:
        config_data.append(((addr << 24) | tile_id, data))
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    # reset them
    tester.reset()
    for addr, index in config_data:
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, 0)

    # configure new one
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in dw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               magma_opts={"coreir_libs": {"float_DW"}},
                               directory=tempdir,
                               flags=["-Wno-fatal"])
Пример #16
0
def test_interconnect_line_buffer(cw_files, add_pd, io_sides):
    depth = 10
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=add_pd,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "data_in"), ("p0", "data0")],
        "e1": [("m0", "data_out"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I1", "f2io_16")],
        "e4": [("i3", "io2f_1"), ("m0", "wen_in")]
    }
    bus = {"e0": 16, "e1": 16, "e3": 16, "e4": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    # in this case we configure m0 as line buffer mode
    mem_x, mem_y = placement["m0"]
    config_data.append(
        (interconnect.get_config_addr(0, 0, mem_x,
                                      mem_y), 0x00000004 | (depth << 3)))
    # then p0 is configured as add
    pe_x, pe_y = placement["p0"]
    tile_id = pe_x << 8 | pe_y
    tile = interconnect.tile_circuits[(pe_x, pe_y)]

    add_bs = tile.core.get_config_bitstream(asm.add())
    for addr, data in add_bs:
        config_data.append(((addr << 24) | tile_id, data))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    src_x, src_y = placement["I0"]
    src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}"
    dst_x, dst_y = placement["I1"]
    dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    wen_x, wen_y = placement["i3"]
    wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}"

    tester.poke(circuit.interface[wen], 1)

    for i in range(200):
        tester.poke(circuit.interface[src], i)
        tester.eval()

        if i > depth + 10:
            tester.expect(circuit.interface[dst], i * 2 - depth)

        # toggle the clock
        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in cw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               directory=tempdir,
                               flags=["-Wno-fatal"])
Пример #17
0
def test_interconnect_sram(run_tb, io_sides):

    # NEW: PASSES

    # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS
    # Basically same
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "addr_in_0")],
        "e1": [("m0", "data_out_0"), ("I1", "f2io_16")],
        "e2": [("i3", "io2f_1"), ("m0", "ren_in_0")]
    }
    bus = {"e0": 16, "e1": 16, "e2": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    mode = 2  # Mode.SRAM
    tile_en = 1
    configs_mem = [("mode", mode, 0), ("tile_en", tile_en, 0),
                   ("flush_reg_sel", 1, 0)]
    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y,
                    mcore)
    config_data = compress_config_data(config_data)

    # in this case we configure (1, 0) as sram mode
    sram_data = []
    # add SRAM data
    for i in range(0, 512):
        feat_addr = i // 256 + 1
        mem_addr = i % 256
        sram_data.append((interconnect.get_config_addr(mem_addr, feat_addr,
                                                       mem_x, mem_y), i))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.zero_inputs()
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    for addr, data in sram_data:
        for i in range(4):
            tester.configure(addr, data * 4 + i)
            tester.eval()
        # currently read back doesn't work
        for i in range(4):
            tester.config_read(addr)
            tester.eval()
            tester.expect(circuit.read_config_data, data * 4 + i)

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    tester.done_config()

    addr_coord = placement["I0"]
    src = interconnect.get_top_input_port_by_coord(addr_coord, 16)
    dst_coord = placement["I1"]
    dst = interconnect.get_top_output_port_by_coord(dst_coord, 16)
    ren_coord = placement["i3"]
    ren = interconnect.get_top_input_port_by_coord(ren_coord, 1)

    tester.step(2)
    tester.poke(circuit.interface[ren], 1)
    tester.eval()

    for i in range(2048):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        tester.step(2)
        tester.eval()
        tester.expect(circuit.interface[dst], i)

    run_tb(tester)
Пример #18
0
def test_interconnect_point_wise(batch_size: int, cw_files, add_pd, io_sides):
    # we test a simple point-wise multiplier function
    # to account for different CGRA size, we feed in data to the very top-left
    # SB and route through horizontally to reach very top-right SB
    # we configure the top-left PE as multiplier
    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=add_pd,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("p0", "data0")],
        "e1": [("I1", "io2f_16"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I2", "f2io_16")],
    }
    bus = {"e0": 16, "e1": 16, "e3": 16}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]
    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.umult0())
    for addr, data in add_bs:
        config_data.append((interconnect.get_config_addr(addr, 0, x, y), data))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    # set the PE core
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    src_x0, src_y0 = placement["I0"]
    src_x1, src_y1 = placement["I1"]
    src_name0 = f"glb2io_16_X{src_x0:02X}_Y{src_y0:02X}"
    src_name1 = f"glb2io_16_X{src_x1:02X}_Y{src_y1:02X}"
    dst_x, dst_y = placement["I2"]
    dst_name = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    random.seed(0)
    for _ in range(batch_size):
        num_1 = random.randrange(0, 256)
        num_2 = random.randrange(0, 256)
        tester.poke(circuit.interface[src_name0], num_1)
        tester.poke(circuit.interface[src_name1], num_2)

        tester.eval()
        tester.expect(circuit.interface[dst_name], num_1 * num_2)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in cw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               directory=tempdir,
                               flags=["-Wno-fatal", "--trace"])
Пример #19
0
def test_interconnect_fifo(run_tb, io_sides, depth):

    # NEW: PASSES

    # WHAT CHANGED HERE? MOVING FROM GENESIS TO KRATOS
    # Basically same

    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "data_in_0")],
        "e1": [("i3", "io2f_1"), ("m0", "wen_in_0")],
        "e2": [("i4", "io2f_1"), ("m0", "ren_in_0")],
        "e3": [("m0", "data_out_0"), ("I1", "f2io_16")],
        "e4": [("m0", "valid_out_0"), ("i4", "f2io_1")],
        "e5": [("m0", "empty"), ("i2", "f2io_1")],
        "e6": [("m0", "full"), ("i3", "f2io_1")]
    }
    bus = {"e0": 16, "e1": 1, "e2": 1, "e3": 16, "e4": 1, "e5": 1, "e6": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    # in this case we configure m0 as fifo mode
    mode = 1  # Mode.FIFO
    tile_en = 1

    almost_count = 3
    if (depth < 5):
        almost_count = 0

    configs_mem = [("fifo_ctrl_fifo_depth", depth, 0), ("mode", 1, 0),
                   ("tile_en", tile_en, 0), ("flush_reg_sel", 1, 0)]
    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y,
                    mcore)
    config_data = compress_config_data(config_data)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.zero_inputs()
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    src_coord = placement["I0"]
    src = interconnect.get_top_input_port_by_coord(src_coord, 16)
    dst_coord = placement["I1"]
    dst = interconnect.get_top_output_port_by_coord(dst_coord, 16)
    wen_coord = placement["i3"]
    wen = interconnect.get_top_input_port_by_coord(wen_coord, 1)
    valid_coord = placement["i4"]
    valid = interconnect.get_top_output_port_by_coord(valid_coord, 1)
    ren_coord = placement["i4"]
    ren = interconnect.get_top_input_port_by_coord(ren_coord, 1)
    full_coord = placement["i3"]
    full = interconnect.get_top_output_port_by_coord(full_coord, 1)
    empty_coord = placement["i2"]
    empty = interconnect.get_top_output_port_by_coord(empty_coord, 1)

    tester.step(1)

    fifo = deque()
    valid_check = 0
    most_recent_read = 0
    for i in range(2048):

        len_fifo = len(fifo)

        # Pick random from (READ, WRITE, READ_AND_WRITE)
        move = random.randint(0, 3)
        if move == 0:
            # read
            tester.poke(circuit.interface[ren], 1)
            if (len(fifo) > 0):
                most_recent_read = fifo.pop()
                # tester.expect(circuit.interface[dst], most_recent_read)
                valid_check = 1
            else:
                valid_check = 0
        elif move == 1:
            # write
            write_val = random.randint(0, 60000)
            tester.poke(circuit.interface[wen], 1)
            tester.poke(circuit.interface[src], write_val)
            if (len(fifo) < depth):
                fifo.appendleft(write_val)
            valid_check = 0
        elif move == 2:
            # r and w
            write_val = random.randint(0, 60000)
            tester.poke(circuit.interface[wen], 1)
            tester.poke(circuit.interface[ren], 1)
            tester.poke(circuit.interface[src], write_val)
            fifo.appendleft(write_val)
            most_recent_read = fifo.pop()
            valid_check = 1
        else:
            # If not doing anything, valid will be low, and we expect
            # to see the same output as before
            valid_check = 0
        tester.eval()

        tester.expect(circuit.interface[empty], len_fifo == 0)
        tester.expect(circuit.interface[full], len_fifo == depth)
        tester.expect(circuit.interface[valid], valid_check)
        if valid_check:
            tester.expect(circuit.interface[dst], most_recent_read)
        tester.step(2)

        tester.poke(circuit.interface[wen], 0)
        tester.poke(circuit.interface[ren], 0)

    run_tb(tester)
Пример #20
0
def test_stall(dw_files, io_sides):
    chip_size = 2
    depth = 10
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=True,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("r1", "reg")],
        "e2": [("r1", "reg"), ("m0", "data_in_0"), ("p0", "data0")],
        "e1": [("m0", "data_out_0"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I1", "f2io_16")],
        "e4": [("i3", "io2f_1"), ("m0", "wen_in_0"), ("m0", "ren_in_0")],
        "e5": [("m0", "valid_out_0"), ("i4", "f2io_1")]
    }
    bus = {"e0": 16, "e2": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    x, y = placement["p0"]

    tile = interconnect.tile_circuits[(x, y)]
    add_bs = tile.core.get_config_bitstream(asm.add(ra_mode=asm.Mode_t.DELAY))
    for addr, data in add_bs:
        config_data.append((interconnect.get_config_addr(addr, 0, x, y), data))

    tile_en = 1

    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core

    configs_mem = [
        ("strg_ub_app_ctrl_input_port_0", 0, 0),
        ("strg_ub_app_ctrl_read_depth_0", depth, 0),
        ("strg_ub_app_ctrl_write_depth_wo_0", depth, 0),
        ("strg_ub_app_ctrl_write_depth_ss_0", depth, 0),
        ("strg_ub_app_ctrl_coarse_input_port_0", 0, 0),
        ("strg_ub_app_ctrl_coarse_read_depth_0", 1, 0),
        ("strg_ub_app_ctrl_coarse_write_depth_wo_0", 1, 0),
        ("strg_ub_app_ctrl_coarse_write_depth_ss_0", 1, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_dimensionality", 2, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_ranges_0", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_ranges_1", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_starting_addr", 0, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_0", 1, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_1", 512, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_2", 0, 0),
        ("strg_ub_input_addr_ctrl_address_gen_0_strides_3", 0, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_dimensionality", 2, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_ranges_0", 512, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_ranges_1", 512, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_starting_addr", 0, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_strides_0", 1, 0),
        ("strg_ub_output_addr_ctrl_address_gen_0_strides_1", 512, 0),
        ("strg_ub_sync_grp_sync_group_0", 1, 0),
        ("strg_ub_tba_0_tb_0_range_outer", depth, 0),
        ("strg_ub_tba_0_tb_0_starting_addr", 0, 0),
        ("strg_ub_tba_0_tb_0_stride", 1, 0),
        ("strg_ub_tba_0_tb_0_dimensionality", 1, 0),
        ("strg_ub_agg_align_0_line_length", depth, 0),
        ("strg_ub_tba_0_tb_0_indices_merged_0",
         (0 << 0) | (1 << 3) | (2 << 6) | (3 << 9), 0),
        ("strg_ub_tba_0_tb_0_range_inner", 4, 0),
        ("strg_ub_tba_0_tb_0_tb_height", 1, 0), ("tile_en", tile_en, 0),
        ("mode", 0, 0), ("flush_reg_sel", 1, 0), ("wen_in_1_reg_sel", 1, 0),
        ("ren_in_1_reg_sel", 1, 0)
    ]
    config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y,
                    mcore)

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()

    # stall the chip
    tester.poke(circuit.interface["stall"], 1)
    tester.eval()

    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    # un-stall the chp
    # stall the chip
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    src_x, src_y = placement["I0"]
    src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}"
    dst_x, dst_y = placement["I1"]
    dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    wen_x, wen_y = placement["i3"]
    wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}"
    valid_x, valid_y = placement["i4"]
    valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}"

    tester.poke(circuit.interface[wen], 1)

    for i in range(20):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        if i >= 10 + 1:
            # data0 of PE: i - 1 - 1
            # data1 of PE: i - 1 - depth
            tester.expect(circuit.interface[dst], i * 2 - 3 - depth)
            tester.expect(circuit.interface[valid], 1)
        elif i < depth:
            tester.expect(circuit.interface[valid], 0)
        if i == 19:
            # now stall everything
            tester.poke(circuit.interface["stall"], 1)
            tester.eval()
        tester.step(2)

    for i in range(20):
        # poke random numbers. it shouldn't matter
        tester.poke(circuit.interface[src], i * 20)
        tester.expect(circuit.interface[dst], 19 * 2 - 3 - depth)
        tester.step(2)

    # un-stall again
    tester.poke(circuit.interface["stall"], 0)
    tester.eval()

    for i in range(19, 30):
        tester.poke(circuit.interface[src], i)
        tester.eval()
        tester.expect(circuit.interface[dst], i * 2 - 3 - depth)
        tester.expect(circuit.interface[valid], 1)
        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in dw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               magma_opts={"coreir_libs": {"float_DW"}},
                               directory=tempdir,
                               flags=["-Wno-fatal"])
Пример #21
0
def test_interconnect_line_buffer_last_line_valid(cw_files, add_pd, io_sides,
                                                  stencil_width, depth):

    chip_size = 2
    interconnect = create_cgra(chip_size,
                               chip_size,
                               io_sides,
                               num_tracks=3,
                               add_pd=add_pd,
                               mem_ratio=(1, 2))

    netlist = {
        "e0": [("I0", "io2f_16"), ("m0", "data_in"), ("p0", "data0")],
        "e1": [("m0", "data_out"), ("p0", "data1")],
        "e3": [("p0", "alu_res"), ("I1", "f2io_16")],
        "e4": [("i3", "io2f_1"), ("m0", "wen_in")],
        "e5": [("m0", "valid_out"), ("i4", "f2io_1")]
    }
    bus = {"e0": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1}

    placement, routing = pnr(interconnect, (netlist, bus))
    config_data = interconnect.get_route_bitstream(routing)

    # in this case we configure m0 as line buffer mode

    mode = Mode.LINE_BUFFER
    tile_en = 1

    mem_x, mem_y = placement["m0"]
    memtile = interconnect.tile_circuits[(mem_x, mem_y)]
    mcore = memtile.core
    config_data.append(
        (interconnect.get_config_addr(mcore.get_reg_index("depth"), 0, mem_x,
                                      mem_y), depth))
    config_data.append(
        (interconnect.get_config_addr(mcore.get_reg_index("mode"), 0, mem_x,
                                      mem_y), mode.value))
    config_data.append(
        (interconnect.get_config_addr(mcore.get_reg_index("stencil_width"), 0,
                                      mem_x, mem_y), stencil_width))
    config_data.append(
        (interconnect.get_config_addr(mcore.get_reg_index("tile_en"), 0, mem_x,
                                      mem_y), tile_en))

    # then p0 is configured as add
    pe_x, pe_y = placement["p0"]
    tile_id = pe_x << 8 | pe_y
    tile = interconnect.tile_circuits[(pe_x, pe_y)]

    add_bs = tile.core.get_config_bitstream(asm.add())
    for addr, data in add_bs:
        config_data.append(((addr << 24) | tile_id, data))

    circuit = interconnect.circuit()

    tester = BasicTester(circuit, circuit.clk, circuit.reset)
    tester.reset()
    for addr, index in config_data:
        tester.configure(addr, index)
        tester.config_read(addr)
        tester.eval()
        tester.expect(circuit.read_config_data, index)

    src_x, src_y = placement["I0"]
    src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}"
    dst_x, dst_y = placement["I1"]
    dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}"
    wen_x, wen_y = placement["i3"]
    wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}"
    valid_x, valid_y = placement["i4"]
    valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}"

    tester.poke(circuit.interface[wen], 1)

    counter = 0
    for i in range(3 * depth):
        tester.poke(circuit.interface[src], counter)
        tester.eval()

        if i < depth + stencil_width - 1:
            tester.expect(circuit.interface[valid], 0)
        elif i < 2 * depth:
            tester.expect(circuit.interface[valid], 1)
        elif i < 2 * depth + stencil_width - 1:
            tester.expect(circuit.interface[valid], 0)
        else:
            tester.expect(circuit.interface[valid], 1)

        # toggle the clock
        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        for genesis_verilog in glob.glob("genesis_verif/*.*"):
            shutil.copy(genesis_verilog, tempdir)
        for filename in cw_files:
            shutil.copy(filename, tempdir)
        shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"),
                    os.path.join(tempdir, "sram_512w_16b.v"))
        for aoi_mux in glob.glob("tests/*.sv"):
            shutil.copy(aoi_mux, tempdir)
        tester.compile_and_run(target="verilator",
                               magma_output="coreir-verilog",
                               directory=tempdir,
                               flags=["-Wno-fatal", "--trace"])