def test_add(interconnect_route): interconnect, placement, route_path = interconnect_route instruction = asm.add() compiler = InterconnectModelCompiler(interconnect) compiler.configure_route(route_path) x, y = placement["p0"] compiler.set_core_instr(x, y, instruction) # configure the memory data_entries = [(i, i + 42) for i in range(100)] mem_instr = MemoryInstruction(MemoryMode.SRAM, data_entries=data_entries) x, y = placement["m0"] compiler.set_core_instr(x, y, mem_instr) model = compiler.compile() # poke values path = route_path["e0"][0] input_1 = path[0] path = route_path["e1"][0] input_2 = path[0] path = route_path["e4"][0] input_3 = path[0] path = route_path["e3"][0] end = path[-1] # set ren to high all the time model.set_value(input_3, 1) for idx, value in enumerate(range(10)): model.set_value(input_1, value) model.set_value(input_2, value) model.eval() if idx > 0: assert model.get_value(end) == value + value + 42 - 2
def test_reset(): tester = fault.Tester(pe_circuit, clock=pe_circuit.CLK) inst = add(ra_mode=Mode_t.DELAY, rb_mode=Mode_t.DELAY) tester.circuit.inst = assembler(inst) data = [0, 0] for i in range(2): while data[i] == 0: data[i] = hwtypes.BitVector.random(16) tester.circuit.data0 = data[0] tester.circuit.data1 = data[1] tester.circuit.CLK = 0 tester.circuit.clk_en = 1 tester.circuit.ASYNCRESET = 0 tester.step(1) tester.circuit.O0.expect(data[0] + data[1]) tester.circuit.ASYNCRESET = 1 tester.eval() tester.circuit.O0.expect(0) tester.step(2) tester.circuit.O0.expect(0) tester.circuit.ASYNCRESET = 0 tester.step(2) tester.circuit.O0.expect(data[0] + data[1]) tester.compile_and_run("verilator", flags=["-Wno-UNUSED", "-Wno-fatal"], directory="tests/build", magma_opts={"coreir_libs": {"float_DW"}})
def test_pe_stall(dw_files): core = PeakCore(gen_pe) core.name = lambda: "PECore" circuit = core.circuit() # random test stuff tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() tester.poke(circuit.interface["stall"], 1) config_data = core.get_config_bitstream( add(ra_mode=Mode.DELAY, rb_mode=Mode.DELAY)) for addr, data in config_data: tester.configure(addr, data) # can't read back yet for i in range(100): tester.poke(circuit.interface["data0"], i + 1) tester.poke(circuit.interface["data1"], i + 1) tester.eval() tester.expect(circuit.interface["alu_res"], 0) with tempfile.TemporaryDirectory() as tempdir: for filename in dw_files: shutil.copy(filename, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir, flags=["-Wno-fatal"])
def write_data01(pe, data0: Data, data1: Data, instr=asm.add(), ra=Data(0)): config_addr = Data8(DATA01_ADDR) config_data = BitVector.concat(data0, data1) config_en = Bit(1) return pe(instr, data0=ra, config_addr=config_addr, config_data=config_data, config_en=config_en)
def test_write_priority_data0(args): instr = asm.add(ra_mode=Mode_t.DELAY) write_data01(pe, data0=args[0], data1=args[1], instr=instr, ra=args[2]) #The config takes prioirty over the ra input assert args[0] == read_data0(pe, instr=instr, ra=args[2]) #Now data0 register should contain args[2] (from delay) assert args[2] == read_data0(pe, instr=instr, ra=args[1]) assert args[1] == read_data0(pe, instr=instr) #data1 should still contain args[1] from the first write_data01 assert args[1] == read_data1(pe)
def test_reg_delay(args): data0, data1 = args inst = asm.add(ra_mode=Mode_t.DELAY, rb_mode=Mode_t.DELAY) data1_delay_values = [UIntVector.random(DATAWIDTH)] rtl_tester(inst, data0, data1, res=data0 + data1, delay=1, data1_delay_values=data1_delay_values)
def test_stall(args): data0, data1 = args inst = asm.add(ra_mode=Mode_t.BYPASS, rb_mode=Mode_t.DELAY) data1_delay_values = [UIntVector.random(DATAWIDTH)] rtl_tester(inst, data0, data1, res=data0, clk_en=0, data1_delay_values=data1_delay_values)
def write_bit012(pe, bit0: Bit, bit1: Bit, bit2: Bit, instr=asm.add()): BV1 = BitVector[1] config_addr = Data8(BIT012_ADDR) config_data = BitVector.concat( BitVector.concat(BitVector.concat(BV1(bit0), BV1(bit1)), BV1(bit2)), BitVector[29](0)) config_en = Bit(1) return pe(instr, data0=Data(0), config_addr=config_addr, config_data=config_data, config_en=config_en)
def test_pe_config(dw_files): core = PeakCore(PE_fc) core.name = lambda: "PECore" circuit = core.circuit() # random test stuff tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() tester.poke(circuit.interface["stall"], 1) config_data = core.get_config_bitstream( add(ra_mode=Mode_t.DELAY, rb_mode=Mode_t.DELAY)) # hacky way to configure it as 0x42 + 0x42 from the operand register config_data += [(3, 0x42 << 16 | 0x42)] for addr, data in config_data: print("{0:08X} {1:08X}".format(addr, data)) tester.configure(addr, data) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, data) for i in range(10): tester.poke(circuit.interface["data0"], i + 1) tester.poke(circuit.interface["data1"], i + 1) tester.eval() tester.expect(circuit.interface["alu_res"], 0x42 + 0x42) tester.reset() lut_val = lut_and().lut config_data = core.get_config_bitstream( inst(alu=ALU_t.Add, lut=lut_val, rd_mode=Mode_t.DELAY, re_mode=Mode_t.DELAY, rf_mode=Mode_t.DELAY)) config_data += [(4, 0x7)] tester.poke(circuit.interface["bit0"], 0) tester.poke(circuit.interface["bit1"], 0) tester.eval() tester.expect(circuit.interface["res_p"], 1) with tempfile.TemporaryDirectory() as tempdir: for filename in dw_files: shutil.copy(filename, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir, flags=["-Wno-fatal"])
def test_pe_stall(run_tb): core = PeakCore(PE_fc) core.name = lambda: "PECore" circuit = core.circuit() # random test stuff tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() tester.poke(circuit.interface["stall"], 1) config_data = core.get_config_bitstream( add(ra_mode=Mode_t.DELAY, rb_mode=Mode_t.DELAY)) for addr, data in config_data: tester.configure(addr, data) # can't read back yet for i in range(100): tester.poke(circuit.interface["data0"], i + 1) tester.poke(circuit.interface["data1"], i + 1) tester.eval() tester.expect(circuit.interface["alu_res"], 0) run_tb(tester)
def test_stall(dw_files, io_sides): chip_size = 2 depth = 10 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("r1", "reg")], "e2": [("r1", "reg"), ("m0", "data_in_0"), ("p0", "data0")], "e1": [("m0", "data_out_0"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I1", "f2io_16")], "e4": [("i3", "io2f_1"), ("m0", "wen_in_0"), ("m0", "ren_in_0")], "e5": [("m0", "valid_out_0"), ("i4", "f2io_1")] } bus = {"e0": 16, "e2": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.add(ra_mode=asm.Mode_t.DELAY)) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, x, y), data)) tile_en = 1 mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core configs_mem = [ ("strg_ub_app_ctrl_input_port_0", 0, 0), ("strg_ub_app_ctrl_read_depth_0", depth, 0), ("strg_ub_app_ctrl_write_depth_wo_0", depth, 0), ("strg_ub_app_ctrl_write_depth_ss_0", depth, 0), ("strg_ub_app_ctrl_coarse_input_port_0", 0, 0), ("strg_ub_app_ctrl_coarse_read_depth_0", 1, 0), ("strg_ub_app_ctrl_coarse_write_depth_wo_0", 1, 0), ("strg_ub_app_ctrl_coarse_write_depth_ss_0", 1, 0), ("strg_ub_input_addr_ctrl_address_gen_0_dimensionality", 2, 0), ("strg_ub_input_addr_ctrl_address_gen_0_ranges_0", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_ranges_1", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_starting_addr", 0, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_0", 1, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_1", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_2", 0, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_3", 0, 0), ("strg_ub_output_addr_ctrl_address_gen_0_dimensionality", 2, 0), ("strg_ub_output_addr_ctrl_address_gen_0_ranges_0", 512, 0), ("strg_ub_output_addr_ctrl_address_gen_0_ranges_1", 512, 0), ("strg_ub_output_addr_ctrl_address_gen_0_starting_addr", 0, 0), ("strg_ub_output_addr_ctrl_address_gen_0_strides_0", 1, 0), ("strg_ub_output_addr_ctrl_address_gen_0_strides_1", 512, 0), ("strg_ub_sync_grp_sync_group_0", 1, 0), ("strg_ub_tba_0_tb_0_range_outer", depth, 0), ("strg_ub_tba_0_tb_0_starting_addr", 0, 0), ("strg_ub_tba_0_tb_0_stride", 1, 0), ("strg_ub_tba_0_tb_0_dimensionality", 1, 0), ("strg_ub_agg_align_0_line_length", depth, 0), ("strg_ub_tba_0_tb_0_indices_merged_0", (0 << 0) | (1 << 3) | (2 << 6) | (3 << 9), 0), ("strg_ub_tba_0_tb_0_range_inner", 4, 0), ("strg_ub_tba_0_tb_0_tb_height", 1, 0), ("tile_en", tile_en, 0), ("mode", 0, 0), ("flush_reg_sel", 1, 0), ("wen_in_1_reg_sel", 1, 0), ("ren_in_1_reg_sel", 1, 0) ] config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() # stall the chip tester.poke(circuit.interface["stall"], 1) tester.eval() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) # un-stall the chp # stall the chip tester.poke(circuit.interface["stall"], 0) tester.eval() src_x, src_y = placement["I0"] src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}" dst_x, dst_y = placement["I1"] dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" wen_x, wen_y = placement["i3"] wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}" valid_x, valid_y = placement["i4"] valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}" tester.poke(circuit.interface[wen], 1) for i in range(20): tester.poke(circuit.interface[src], i) tester.eval() if i >= 10 + 1: # data0 of PE: i - 1 - 1 # data1 of PE: i - 1 - depth tester.expect(circuit.interface[dst], i * 2 - 3 - depth) tester.expect(circuit.interface[valid], 1) elif i < depth: tester.expect(circuit.interface[valid], 0) if i == 19: # now stall everything tester.poke(circuit.interface["stall"], 1) tester.eval() tester.step(2) for i in range(20): # poke random numbers. it shouldn't matter tester.poke(circuit.interface[src], i * 20) tester.expect(circuit.interface[dst], 19 * 2 - 3 - depth) tester.step(2) # un-stall again tester.poke(circuit.interface["stall"], 0) tester.eval() for i in range(19, 30): tester.poke(circuit.interface[src], i) tester.eval() tester.expect(circuit.interface[dst], i * 2 - 3 - depth) tester.expect(circuit.interface[valid], 1) tester.step(2) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in dw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) for aoi_mux in glob.glob("tests/*.sv"): shutil.copy(aoi_mux, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir, flags=["-Wno-fatal"])
def test_pond_pe_acc(run_tb): chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides(), num_tracks=3, add_pd=True, add_pond=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("p0", "data0")], "e1": [("p0", "data_out_pond"), ("p0", "data1")], "e2": [("p0", "alu_res"), ("p0", "data_in_pond")], "e3": [("p0", "data_out_pond"), ("I1", "f2io_16")] } bus = {"e0": 16, "e1": 16, "e2": 16, "e3": 16} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) pe_x, pe_y = placement["p0"] petile = interconnect.tile_circuits[(pe_x, pe_y)] pondcore = petile.additional_cores[0] add_bs = petile.core.get_config_bitstream(asm.add()) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, pe_x, pe_y), data)) # Ranges, Strides, Dimensionality, Starting Addr, Starting Addr - Schedule ctrl_rd = [[16, 1], [0, 0], 2, 8, 0, [1, 0]] ctrl_wr = [[16, 1], [0, 0], 2, 8, 0, [1, 0]] generate_pond_api(interconnect, pondcore, ctrl_rd, ctrl_wr, pe_x, pe_y, config_data) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.zero_inputs() tester.reset() tester.poke(circuit.interface["stall"], 1) for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) tester.done_config() tester.poke(circuit.interface["stall"], 0) tester.eval() src_x0, src_y0 = placement["I0"] src_name0 = f"glb2io_16_X{src_x0:02X}_Y{src_y0:02X}" dst_x, dst_y = placement["I1"] dst_name = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" random.seed(0) total = 0 for i in range(16): tester.poke(circuit.interface[src_name0], i + 1) total = total + i tester.eval() tester.expect(circuit.interface[dst_name], total) tester.step(2) tester.eval() run_tb(tester)
def test_interconnect_line_buffer(cw_files, add_pd, io_sides): depth = 10 chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=add_pd, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "data_in"), ("p0", "data0")], "e1": [("m0", "data_out"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I1", "f2io_16")], "e4": [("i3", "io2f_1"), ("m0", "wen_in")] } bus = {"e0": 16, "e1": 16, "e3": 16, "e4": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) # in this case we configure m0 as line buffer mode mem_x, mem_y = placement["m0"] config_data.append( (interconnect.get_config_addr(0, 0, mem_x, mem_y), 0x00000004 | (depth << 3))) # then p0 is configured as add pe_x, pe_y = placement["p0"] tile_id = pe_x << 8 | pe_y tile = interconnect.tile_circuits[(pe_x, pe_y)] add_bs = tile.core.get_config_bitstream(asm.add()) for addr, data in add_bs: config_data.append(((addr << 24) | tile_id, data)) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) src_x, src_y = placement["I0"] src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}" dst_x, dst_y = placement["I1"] dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" wen_x, wen_y = placement["i3"] wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}" tester.poke(circuit.interface[wen], 1) for i in range(200): tester.poke(circuit.interface[src], i) tester.eval() if i > depth + 10: tester.expect(circuit.interface[dst], i * 2 - depth) # toggle the clock tester.step(2) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in cw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", directory=tempdir, flags=["-Wno-fatal"])
def test_stall(run_tb, io_sides): chip_size = 2 depth = 10 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=True, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("r1", "reg")], "e2": [("r1", "reg"), ("m0", "data_in_0"), ("p0", "data0")], "e1": [("m0", "data_out_0"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I1", "f2io_16")], "e4": [("i3", "io2f_1"), ("m0", "wen_in_0"), ("m0", "ren_in_0")], "e5": [("m0", "valid_out_0"), ("i4", "f2io_1")] } bus = {"e0": 16, "e2": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) x, y = placement["p0"] tile = interconnect.tile_circuits[(x, y)] add_bs = tile.core.get_config_bitstream(asm.add(ra_mode=asm.Mode_t.DELAY)) for addr, data in add_bs: config_data.append((interconnect.get_config_addr(addr, 0, x, y), data)) tile_en = 1 mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core configs_mem = [ ("strg_ub_app_ctrl_input_port_0", 0, 0), ("strg_ub_app_ctrl_output_port_0", 0, 0), ("strg_ub_app_ctrl_read_depth_0", depth, 0), ("strg_ub_app_ctrl_write_depth_wo_0", depth, 0), ("strg_ub_app_ctrl_write_depth_ss_0", depth, 0), ("strg_ub_app_ctrl_coarse_input_port_0", 0, 0), ("strg_ub_app_ctrl_coarse_read_depth_0", 1, 0), ("strg_ub_app_ctrl_coarse_write_depth_wo_0", 1, 0), ("strg_ub_app_ctrl_coarse_write_depth_ss_0", 1, 0), ("strg_ub_input_addr_ctrl_address_gen_0_dimensionality", 2, 0), ("strg_ub_input_addr_ctrl_address_gen_0_ranges_0", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_ranges_1", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_starting_addr", 0, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_0", 1, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_1", 512, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_2", 0, 0), ("strg_ub_input_addr_ctrl_address_gen_0_strides_3", 0, 0), ("strg_ub_output_addr_ctrl_address_gen_0_dimensionality", 2, 0), ("strg_ub_output_addr_ctrl_address_gen_0_ranges_0", 512, 0), ("strg_ub_output_addr_ctrl_address_gen_0_ranges_1", 512, 0), ("strg_ub_output_addr_ctrl_address_gen_0_starting_addr", 0, 0), ("strg_ub_output_addr_ctrl_address_gen_0_strides_0", 1, 0), ("strg_ub_output_addr_ctrl_address_gen_0_strides_1", 512, 0), ("strg_ub_sync_grp_sync_group_0", 1, 0), ("strg_ub_tba_0_tb_0_range_outer", depth, 0), ("strg_ub_tba_0_tb_0_starting_addr", 0, 0), ("strg_ub_tba_0_tb_0_stride", 1, 0), ("strg_ub_tba_0_tb_0_dimensionality", 1, 0), ("strg_ub_agg_align_0_line_length", depth, 0), ("strg_ub_tba_0_tb_0_indices_0", 0, 0), ("strg_ub_tba_0_tb_0_indices_1", 1, 0), ("strg_ub_tba_0_tb_0_indices_2", 2, 0), ("strg_ub_tba_0_tb_0_indices_3", 3, 0), ("strg_ub_tba_0_tb_0_range_inner", 4, 0), ("strg_ub_tba_0_tb_0_tb_height", 1, 0), ("tile_en", tile_en, 0), ("mode", 0, 0), ("flush_reg_sel", 1, 0), ("wen_in_1_reg_sel", 1, 0), ("ren_in_1_reg_sel", 1, 0) ] config_mem_tile(interconnect, config_data, configs_mem, mem_x, mem_y, mcore) config_data = compress_config_data(config_data) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() # stall the chip tester.poke(circuit.interface["stall"], 1) tester.eval() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) # un-stall the chp # stall the chip tester.poke(circuit.interface["stall"], 0) tester.eval() src_x, src_y = placement["I0"] src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}" dst_x, dst_y = placement["I1"] dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" wen_x, wen_y = placement["i3"] wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}" valid_x, valid_y = placement["i4"] valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}" tester.poke(circuit.interface[wen], 1) for i in range(20): tester.poke(circuit.interface[src], i) tester.eval() if i >= 10 + 1: # data0 of PE: i - 1 - 1 # data1 of PE: i - 1 - depth tester.expect(circuit.interface[dst], i * 2 - 3 - depth) tester.expect(circuit.interface[valid], 1) elif i < depth: tester.expect(circuit.interface[valid], 0) if i == 19: # now stall everything tester.poke(circuit.interface["stall"], 1) tester.eval() tester.step(2) for i in range(20): # poke random numbers. it shouldn't matter tester.poke(circuit.interface[src], i * 20) tester.expect(circuit.interface[dst], 19 * 2 - 3 - depth) tester.step(2) # un-stall again tester.poke(circuit.interface["stall"], 0) tester.eval() for i in range(19, 30): tester.poke(circuit.interface[src], i) tester.eval() tester.expect(circuit.interface[dst], i * 2 - 3 - depth) tester.expect(circuit.interface[valid], 1) tester.step(2) run_tb(tester)
def read_bit2(pe): instr = asm.add() config_addr = Data8(BIT012_ADDR) _, _, config_read = pe(instr, Data(0), config_addr=config_addr) return config_read[BIT2_START]
def test_interconnect_line_buffer_last_line_valid(cw_files, add_pd, io_sides, stencil_width, depth): chip_size = 2 interconnect = create_cgra(chip_size, chip_size, io_sides, num_tracks=3, add_pd=add_pd, mem_ratio=(1, 2)) netlist = { "e0": [("I0", "io2f_16"), ("m0", "data_in"), ("p0", "data0")], "e1": [("m0", "data_out"), ("p0", "data1")], "e3": [("p0", "alu_res"), ("I1", "f2io_16")], "e4": [("i3", "io2f_1"), ("m0", "wen_in")], "e5": [("m0", "valid_out"), ("i4", "f2io_1")] } bus = {"e0": 16, "e1": 16, "e3": 16, "e4": 1, "e5": 1} placement, routing = pnr(interconnect, (netlist, bus)) config_data = interconnect.get_route_bitstream(routing) # in this case we configure m0 as line buffer mode mode = Mode.LINE_BUFFER tile_en = 1 mem_x, mem_y = placement["m0"] memtile = interconnect.tile_circuits[(mem_x, mem_y)] mcore = memtile.core config_data.append( (interconnect.get_config_addr(mcore.get_reg_index("depth"), 0, mem_x, mem_y), depth)) config_data.append( (interconnect.get_config_addr(mcore.get_reg_index("mode"), 0, mem_x, mem_y), mode.value)) config_data.append( (interconnect.get_config_addr(mcore.get_reg_index("stencil_width"), 0, mem_x, mem_y), stencil_width)) config_data.append( (interconnect.get_config_addr(mcore.get_reg_index("tile_en"), 0, mem_x, mem_y), tile_en)) # then p0 is configured as add pe_x, pe_y = placement["p0"] tile_id = pe_x << 8 | pe_y tile = interconnect.tile_circuits[(pe_x, pe_y)] add_bs = tile.core.get_config_bitstream(asm.add()) for addr, data in add_bs: config_data.append(((addr << 24) | tile_id, data)) circuit = interconnect.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() for addr, index in config_data: tester.configure(addr, index) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, index) src_x, src_y = placement["I0"] src = f"glb2io_16_X{src_x:02X}_Y{src_y:02X}" dst_x, dst_y = placement["I1"] dst = f"io2glb_16_X{dst_x:02X}_Y{dst_y:02X}" wen_x, wen_y = placement["i3"] wen = f"glb2io_1_X{wen_x:02X}_Y{wen_y:02X}" valid_x, valid_y = placement["i4"] valid = f"io2glb_1_X{valid_x:02X}_Y{valid_y:02X}" tester.poke(circuit.interface[wen], 1) counter = 0 for i in range(3 * depth): tester.poke(circuit.interface[src], counter) tester.eval() if i < depth + stencil_width - 1: tester.expect(circuit.interface[valid], 0) elif i < 2 * depth: tester.expect(circuit.interface[valid], 1) elif i < 2 * depth + stencil_width - 1: tester.expect(circuit.interface[valid], 0) else: tester.expect(circuit.interface[valid], 1) # toggle the clock tester.step(2) with tempfile.TemporaryDirectory() as tempdir: for genesis_verilog in glob.glob("genesis_verif/*.*"): shutil.copy(genesis_verilog, tempdir) for filename in cw_files: shutil.copy(filename, tempdir) shutil.copy(os.path.join("tests", "test_memory_core", "sram_stub.v"), os.path.join(tempdir, "sram_512w_16b.v")) for aoi_mux in glob.glob("tests/*.sv"): shutil.copy(aoi_mux, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", directory=tempdir, flags=["-Wno-fatal", "--trace"])
def read_data1(pe): instr = asm.add() config_addr = Data8(DATA01_ADDR) _, _, config_read = pe(instr, Data(0), config_addr=config_addr) return config_read[DATA1_START:DATA1_START + DATA1_WIDTH]
def read_data0(pe, instr=asm.add(), ra=Data(0)): config_addr = Data8(DATA01_ADDR) _, _, config_read = pe(instr, data0=ra, config_addr=config_addr) return config_read[DATA0_START:DATA0_START + DATA0_WIDTH]
PE = PE_fc(PyFamily()) pe = PE() BFloat16 = BFloat16_fc(PyFamily()) Data = BitVector[DATAWIDTH] op = namedtuple("op", ["inst", "func"]) NTESTS = 4 @pytest.mark.parametrize("op", [ op(asm.and_(), lambda x, y: x & y), op(asm.or_(), lambda x, y: x | y), op(asm.xor(), lambda x, y: x ^ y), op(asm.add(), lambda x, y: x + y), op(asm.sub(), lambda x, y: x - y), op(asm.lsl(), lambda x, y: x << y), op(asm.lsr(), lambda x, y: x >> y), op(asm.umin(), lambda x, y: (x < y).ite(x, y)), op(asm.umax(), lambda x, y: (x > y).ite(x, y)) ]) @pytest.mark.parametrize( "args", [(UIntVector.random(DATAWIDTH), UIntVector.random(DATAWIDTH)) for _ in range(NTESTS)]) def test_unsigned_binary(op, args): x, y = args res, _, _ = pe(op.inst, Data(x), Data(y)) assert res == op.func(x, y) rtl_tester(op, x, y, res=res)
def __call__(self, in0: Data, in1: Data, in2: Data) -> Data: inst1 = asm.smult0() inst2 = asm.add() pe1_out, _, _ = self.pe1(inst1, in0, in1) pe2_out, _, _ = self.pe2(inst2, pe1_out, in2) return pe2_out
def test_reg_const(args): data0, const1 = args data1 = UIntVector.random(DATAWIDTH) inst = asm.add(rb_mode=Mode_t.CONST, rb_const=const1) rtl_tester(inst, data0, data1, res=data0 + const1)