def test_interconnect_reset(batch_size: int, run_tb, io_sides): # we test a simple point-wise multiplier function # to account for different CGRA size, we feed in data to the very top-left # SB and route through horizontally to reach very top-right SB # we configure the top-left PE as multiplier chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data0")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I2", "f2io_16")], } bus = {"e0": 16, "e1": 16, "e3": 16} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile_id = x << 8 | y tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.umult0()) for addr, data in add_bs: config_data.append(((addr << 24) | tile_id, data)) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) # reset them tester.reset() for addr, index in config_data: tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, 0) # configure new one for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) run_tb(tester)
def test_umult(args): def mul(x, y): mulx, muly = x.zext(DATAWIDTH), y.zext(DATAWIDTH) return mulx * muly umult0 = asm.umult0() umult1 = asm.umult1() umult2 = asm.umult2() x, y = args xy = mul(x, y) res, _, _ = pe(umult0, Data(x), Data(y)) assert res == xy[0:DATAWIDTH] rtl_tester(umult0, x, y, res=res) res, _, _ = pe(umult1, Data(x), Data(y)) assert res == xy[DATAWIDTH // 2:DATAWIDTH // 2 + DATAWIDTH] rtl_tester(umult1, x, y, res=res) res, _, _ = pe(umult2, Data(x), Data(y)) assert res == xy[DATAWIDTH:] rtl_tester(umult2, x, y, res=res)
def test_interconnect_point_wise(batch_size: int, cw_files, add_pd, io_sides): # we test a simple point-wise multiplier function # to account for different CGRA size, we feed in data to the very top-left # SB and route through horizontally to reach very top-right SB # we configure the top-left PE as multiplier chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=add_pd, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data0")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I2", "f2io_16")], } bus = {"e0": 16, "e1": 16, "e3": 16} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.umult0()) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, x, y), data)) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() # set the PE core for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) src_x0, src_y0 = placement["I0"] src_x1, src_y1 = placement["I1"] src_name0 = f"glb2io_16_X{src_x0:02X}_Y{src_y0:02X}" src_name1 = f"glb2io_16_X{src_x1:02X}_Y{src_y1:02X}" dst_x, dst_y = placement["I2"] dst_name = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" random.seed(0) for _ in range(batch_size): num_1 = random.randrange(0, 256) num_2 = random.randrange(0, 256) tester.poke(circuit.interface[src_name0], num_1) tester.poke(circuit.interface[src_name1], num_2) tester.eval() tester.expect(circuit.interface[dst_name], num_1 * num_2) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in cw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", directory=tempdir, flags=["-Wno-fatal", "--trace"])
def test_interconnect_reset(batch_size: int, dw_files, io_sides): # we test a simple point-wise multiplier function # to account for different CGRA size, we feed in data to the very top-left # SB and route through horizontally to reach very top-right SB # we configure the top-left PE as multiplier chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data0")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I2", "f2io_16")], } bus = {"e0": 16, "e1": 16, "e3": 16} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile_id = x << 8 | y tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.umult0()) for addr, data in add_bs: config_data.append(((addr << 24) | tile_id, data)) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) # reset them tester.reset() for addr, index in config_data: tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, 0) # configure new one for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in dw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) for aoi_mux in glob.glob("tests/*.sv"): shutil.copy(aoi_mux, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir, flags=["-Wno-fatal"])
def _make_random(cls): if issubclass(cls, hwtypes.BitVector): return cls.random(len(cls)) if issubclass(cls, hwtypes.FPVector): while True: val = cls.random() if val.fp_is_normal(): return val.reinterpret_as_bv() return NotImplemented _CAD_DIR = "/cad/synopsys/syn/P-2019.03/dw/sim_ver/" _EXPENSIVE = { "bits32.mul": ((umult0(),), "magma_Bits_32_mul_inst0", hwtypes.UIntVector[16]), # noqa "bfloat16.mul": ((fp_mul(),), "magma_BFloat_16_mul_inst0", BFloat16_fc(PyFamily())), # noqa "bfloat16.add": ((fp_add(),), "magma_BFloat_16_add_inst0", BFloat16_fc(PyFamily())), # noqa } @pytest.mark.parametrize("op", list(_EXPENSIVE.keys())) def test_pe_data_gate(op, dw_files): instrs, fu, BV = _EXPENSIVE[op] is_float = issubclass(BV, hwtypes.FPVector) if not irun_available() and is_float: pytest.skip("Need irun to test fp ops") core = PeakCore(PE_fc) core.name = lambda: "PECore"
def test_pond_pe(verilator=True): chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides(), num_tracks=3, add_pd=True, add_pond=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data_in_pond")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e2": [("p0", "alu_res"), ("I2", "f2io_16")], "e3": [("p0", "data_out_pond"), ("p0", "data0")] } bus = {"e0": 16, "e1": 16, "e2": 16, "e3": 16} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) pe_x, pe_y = placement["p0"] petile = interconnect.tile_circuits[(pe_x, pe_y)] pondcore = petile.additional_cores[0] add_bs = petile.core.get_config_bitstream(asm.umult0()) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, pe_x, pe_y), data)) # Ranges, Strides, Dimensionality, Starting Addr, Starting Addr - Schedule ctrl_rd = [[16, 1], [1, 1], 2, 0, 16] ctrl_wr = [[16, 1], [1, 1], 2, 0, 0] generate_pond_api(interconnect, pondcore, ctrl_rd, ctrl_wr, pe_x, pe_y, config_data) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.poke(circuit.interface["stall"], 1) for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) tester.done_config() tester.poke(circuit.interface["stall"], 0) tester.eval() src_x0, src_y0 = placement["I0"] src_x1, src_y1 = placement["I1"] src_name0 = f"glb2io_16_X{src_x0:02X}_Y{src_y0:02X}" src_name1 = f"glb2io_16_X{src_x1:02X}_Y{src_y1:02X}" dst_x, dst_y = placement["I2"] dst_name = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" random.seed(0) for i in range(32): if i < 16: tester.poke(circuit.interface[src_name0], i) tester.eval() if i >= 16: num = random.randrange(0, 256) tester.poke(circuit.interface[src_name1], num) tester.eval() tester.expect(circuit.interface[dst_name], (i - 16) * num) tester.step(2) tester.eval() with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in dw_files(): shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) for aoi_mux in glob.glob("tests/*.sv"): shutil.copy(aoi_mux, tempdir) target = "verilator" runtime_kwargs = { "magma_output": "coreir-verilog", "magma_opts": { "coreir_libs": {"float_DW"} }, "directory": tempdir, "flags": ["-Wno-fatal", "--trace"] } if verilator is False: target = "system-verilog" runtime_kwargs["simulator"] = "vcs" tester.compile_and_run(target=target, tmp_dir=False, **runtime_kwargs)
def _make_random(cls): if issubclass(cls, hwtypes.BitVector): return cls.random(len(cls)) if issubclass(cls, hwtypes.FPVector): while True: val = cls.random() if val.fp_is_normal(): return val.reinterpret_as_bv() return NotImplemented _EXPENSIVE = { "bits32.mul": ((umult0(), ), "magma_UInt_32_mul_inst0", hwtypes.UIntVector[16]), # noqa "bfloat16.mul": ((fp_mul(), ), "magma_BFloat_16_mul_inst0", BFloat16_fc(PyFamily())), # noqa "bfloat16.add": ((fp_add(), ), "magma_BFloat_16_add_inst0", BFloat16_fc(PyFamily())), # noqa } @pytest.mark.parametrize("op", list(_EXPENSIVE.keys())) def test_pe_data_gate(op, run_tb): instrs, fu, BV = _EXPENSIVE[op] is_float = issubclass(BV, hwtypes.FPVector) if not irun_available() and is_float: pytest.skip("Need irun to test fp ops")
def test_interconnect_point_wise(batch_size: int, run_tb, io_sides): # we test a simple point-wise multiplier function # to account for different CGRA size, we feed in data to the very top-left # SB and route through horizontally to reach very top-right SB # we configure the top-left PE as multiplier chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data0")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I2", "f2io_16")], } bus = {"e0": 16, "e1": 16, "e3": 16} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.umult0()) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, x, y), data)) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.zero_inputs() tester.reset() # set the PE core for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) tester.done_config() src0 = placement["I0"] src1 = placement["I1"] src_name0 = interconnect.get_top_input_port_by_coord(src0, 16) src_name1 = interconnect.get_top_input_port_by_coord(src1, 16) dst = placement["I2"] dst_name = interconnect.get_top_output_port_by_coord(dst, 16) random.seed(0) for _ in range(batch_size): num_1 = random.randrange(0, 256) num_2 = random.randrange(0, 256) tester.poke(circuit.interface[src_name0], num_1) tester.poke(circuit.interface[src_name1], num_2) tester.eval() tester.expect(circuit.interface[dst_name], num_1 * num_2) run_tb(tester)
def test_pond_pe(run_tb): chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides(), num_tracks=3, add_pd=True, add_pond=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data_in_pond")], "e1": [("I1", "io2f_16"), ("p0", "data1")], "e2": [("p0", "alu_res"), ("I2", "f2io_16")], "e3": [("p0", "data_out_pond"), ("p0", "data0")] } bus = {"e0": 16, "e1": 16, "e2": 16, "e3": 16} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) pe_x, pe_y = placement["p0"] petile = interconnect.tile_circuits[(pe_x, pe_y)] pondcore = petile.additional_cores[0] add_bs = petile.core.get_config_bitstream(asm.umult0()) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, pe_x, pe_y), data)) # Ranges, Strides, Dimensionality, Starting Addr, Starting Addr - Schedule ctrl_rd = [[16, 1], [1, 1], 2, 0, 16, [1, 1]] ctrl_wr = [[16, 1], [1, 1], 2, 0, 0, [1, 1]] generate_pond_api(interconnect, pondcore, ctrl_rd, ctrl_wr, pe_x, pe_y, config_data) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.zero_inputs() tester.reset() tester.poke(circuit.interface["stall"], 1) for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) tester.done_config() tester.poke(circuit.interface["stall"], 0) tester.eval() src_x0, src_y0 = placement["I0"] src_x1, src_y1 = placement["I1"] src_name0 = f"glb2io_16_X{src_x0:02X}_Y{src_y0:02X}" src_name1 = f"glb2io_16_X{src_x1:02X}_Y{src_y1:02X}" dst_x, dst_y = placement["I2"] dst_name = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" random.seed(0) for i in range(32): if i < 16: tester.poke(circuit.interface[src_name0], i) tester.eval() if i >= 16: num = random.randrange(0, 256) tester.poke(circuit.interface[src_name1], num) tester.eval() tester.expect(circuit.interface[dst_name], (i - 16) * num) tester.step(2) tester.eval() run_tb(tester)