def test_pe_stall(dw_files): core = PeakCore(gen_pe) core.name = lambda: "PECore" circuit = core.circuit() # random test stuff tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() tester.poke(circuit.interface["stall"], 1) config_data = core.get_config_bitstream( add(ra_mode=Mode.DELAY, rb_mode=Mode.DELAY)) for addr, data in config_data: tester.configure(addr, data) # can't read back yet for i in range(100): tester.poke(circuit.interface["data0"], i + 1) tester.poke(circuit.interface["data1"], i + 1) tester.eval() tester.expect(circuit.interface["alu_res"], 0) with tempfile.TemporaryDirectory() as tempdir: for filename in dw_files: shutil.copy(filename, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir, flags=["-Wno-fatal"])
def test_pe_data_gate(op, run_tb): instrs, fu, BV = _EXPENSIVE[op] is_float = issubclass(BV, hwtypes.FPVector) if not irun_available() and is_float: pytest.skip("Need irun to test fp ops") # note to skip mul since CW BFloat is faulty if op == "bfloat16.mul": pytest.skip("We don't have correct CW BFloat implementation yet") core = PeakCore(PE_fc) core.name = lambda: "PECore" circuit = core.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) alu = tester.circuit.WrappedPE_inst0.PE_inst0.ALU_inst0 fu = getattr(alu, fu) other_fu = set(_EXPENSIVE[other_op][1] for other_op in _EXPENSIVE if other_op != op) other_fu = [getattr(alu, k) for k in other_fu] def _test_instr(instr): # Configure PE. tester.zero_inputs() tester.reset() config_data = core.get_config_bitstream(instr) for addr, data in config_data: tester.configure(addr, data) # Stream data. for _ in range(100): a = _make_random(BV) b = _make_random(BV) tester.poke(circuit.data0, a) tester.poke(circuit.data1, b) tester.eval() expected, _, _ = core.wrapper.model(instr, a, b) tester.expect(circuit.alu_res, expected) for other_fu_i in other_fu: tester.expect(other_fu_i.I0, 0) tester.expect(other_fu_i.I1, 0) for instr in instrs: _test_instr(instr) if irun_available(): run_tb(tester) else: run_tb(tester, verilator_debug=True)
def test_peak_core_sequence(sequence, run_tb): """ Core level test * configures core using instruction bitstream * drives input values onto data0 and data1 ports * checks alu_res output """ def core_output_monitor(tester, config_data, a, b, output): tester.expect(tester._circuit.alu_res, output) core = PeakCore(PE_fc) core.name = lambda: "PECore" circuit = core.circuit() tester = BasicSequenceTester(circuit, CoreDriver(), CoreMonitor(), sequence, circuit.clk, circuit.reset) tester.reset() run_tb(tester)
def sequence(): """ a 4-tuple (config_data, a, b, output) * config_data - bitstream for configuring the core to perform a random instruction * a, b - random input values for data0, data * output - expected outputs given a, b """ core = PeakCore(PE_fc) sequence = [] for _ in range(5): # Choose a random operation from lassen.asm op = random.choice([add, sub]) # Get encoded instruction (using bypass registers for now) instruction = op(ra_mode=Mode_t.BYPASS, rb_mode=Mode_t.BYPASS) # Convert to bitstream format config_data = core.get_config_bitstream(instruction) # Generate random inputs a, b = (BitVector.random(16) for _ in range(2)) # Get expected output output = core.wrapper.model(instruction, a, b)[0] sequence.append((config_data, a, b, output)) return sequence
def test_pe_stall(run_tb): core = PeakCore(PE_fc) core.name = lambda: "PECore" circuit = core.circuit() # random test stuff tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() tester.poke(circuit.interface["stall"], 1) config_data = core.get_config_bitstream( add(ra_mode=Mode_t.DELAY, rb_mode=Mode_t.DELAY)) for addr, data in config_data: tester.configure(addr, data) # can't read back yet for i in range(100): tester.poke(circuit.interface["data0"], i + 1) tester.poke(circuit.interface["data1"], i + 1) tester.eval() tester.expect(circuit.interface["alu_res"], 0) run_tb(tester)
def test_pe_config(dw_files): core = PeakCore(PE_fc) core.name = lambda: "PECore" circuit = core.circuit() # random test stuff tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() tester.poke(circuit.interface["stall"], 1) config_data = core.get_config_bitstream( add(ra_mode=Mode_t.DELAY, rb_mode=Mode_t.DELAY)) # hacky way to configure it as 0x42 + 0x42 from the operand register config_data += [(3, 0x42 << 16 | 0x42)] for addr, data in config_data: print("{0:08X} {1:08X}".format(addr, data)) tester.configure(addr, data) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, data) for i in range(10): tester.poke(circuit.interface["data0"], i + 1) tester.poke(circuit.interface["data1"], i + 1) tester.eval() tester.expect(circuit.interface["alu_res"], 0x42 + 0x42) tester.reset() lut_val = lut_and().lut config_data = core.get_config_bitstream( inst(alu=ALU_t.Add, lut=lut_val, rd_mode=Mode_t.DELAY, re_mode=Mode_t.DELAY, rf_mode=Mode_t.DELAY)) config_data += [(4, 0x7)] tester.poke(circuit.interface["bit0"], 0) tester.poke(circuit.interface["bit1"], 0) tester.eval() tester.expect(circuit.interface["res_p"], 1) with tempfile.TemporaryDirectory() as tempdir: for filename in dw_files: shutil.copy(filename, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir, flags=["-Wno-fatal"])
def create_cgra(width: int, height: int, io_sides: IOSide, add_reg: bool = True, mem_ratio: Tuple[int, int] = (1, 4), reg_addr_width: int = 8, config_data_width: int = 32, tile_id_width: int = 16, num_tracks: int = 5, add_pd: bool = True, use_sram_stub: bool = True, hi_lo_tile_id: bool = True, pass_through_clk: bool = True, global_signal_wiring: GlobalSignalWiring = GlobalSignalWiring.Meso, standalone: bool = False, switchbox_type: SwitchBoxType = SwitchBoxType.Imran, port_conn_override: Dict[str, List[Tuple[SwitchBoxSide, SwitchBoxIO]]] = None): # currently only add 16bit io cores bit_widths = [1, 16] track_length = 1 # compute the actual size width, height = get_actual_size(width, height, io_sides) # these values are inclusive x_min, x_max, y_min, y_max = get_array_size(width, height, io_sides) # compute ratio tile_max = mem_ratio[-1] mem_tile_ratio = tile_max - mem_ratio[0] # creates all the cores here # we don't want duplicated cores when snapping into different interconnect # graphs cores = {} for x in range(width): for y in range(height): # empty corner if x in range(x_min) and y in range(y_min): core = None elif x in range(x_min) and y in range(y_max + 1, height): core = None elif x in range(x_max + 1, width) and y in range(y_min): core = None elif x in range(x_max + 1, width) and y in range(y_max + 1, height): core = None elif x in range(x_min) \ or x in range(x_max + 1, width) \ or y in range(y_min) \ or y in range(y_max + 1, height): core = IOCore() else: core = MemCore(use_sram_stub=use_sram_stub) if \ ((x - x_min) % tile_max >= mem_tile_ratio) else \ PeakCore(PE_fc) cores[(x, y)] = core def create_core(xx: int, yy: int): return cores[(xx, yy)] # Specify input and output port connections. inputs = set() outputs = set() for core in cores.values(): # Skip IO cores. if core is None or isinstance(core, IOCore): continue inputs |= {i.qualified_name() for i in core.inputs()} outputs |= {o.qualified_name() for o in core.outputs()} # This is slightly different from the original CGRA. Here we connect # input to every SB_IN and output to every SB_OUT. port_conns = {} in_conn = [(side, SwitchBoxIO.SB_IN) for side in SwitchBoxSide] out_conn = [(side, SwitchBoxIO.SB_OUT) for side in SwitchBoxSide] port_conns.update({input_: in_conn for input_ in inputs}) port_conns.update({output: out_conn for output in outputs}) if port_conn_override is not None: port_conns.update(port_conn_override) pipeline_regs = [] for track in range(num_tracks): for side in SwitchBoxSide: pipeline_regs.append((track, side)) # if reg mode is off, reset to empty if not add_reg: pipeline_regs = [] ics = {} track_list = list(range(num_tracks)) io_in = {"f2io_1": [0], "f2io_16": [0]} io_out = {"io2f_1": track_list, "io2f_16": track_list} for bit_width in bit_widths: if io_sides & IOSide.None_: io_conn = None else: io_conn = {"in": io_in, "out": io_out} ic = create_uniform_interconnect(width, height, bit_width, create_core, port_conns, {track_length: num_tracks}, switchbox_type, pipeline_regs, io_sides=io_sides, io_conn=io_conn) ics[bit_width] = ic interconnect = Interconnect(ics, reg_addr_width, config_data_width, tile_id_width, lift_ports=standalone, stall_signal_width=1) if hi_lo_tile_id: tile_id_physical(interconnect) if add_pd: add_power_domain(interconnect) interconnect.finalize() if global_signal_wiring == GlobalSignalWiring.Meso: apply_global_meso_wiring(interconnect, io_sides=io_sides) elif global_signal_wiring == GlobalSignalWiring.Fanout: apply_global_fanout_wiring(interconnect, io_sides=io_sides) elif global_signal_wiring == GlobalSignalWiring.ParallelMeso: apply_global_meso_wiring(interconnect, io_sides=io_sides) if add_pd: add_aon_read_config_data(interconnect) if pass_through_clk: clk_physical(interconnect) return interconnect
def test_pe_data_gate(op, dw_files): instrs, fu, BV = _EXPENSIVE[op] is_float = issubclass(BV, hwtypes.FPVector) if not irun_available() and is_float: pytest.skip("Need irun to test fp ops") core = PeakCore(PE_fc) core.name = lambda: "PECore" circuit = core.circuit() tester = BasicTester(circuit, circuit.clk, circuit.reset) alu = tester.circuit.WrappedPE_inst0.PE_inst0.ALU_inst0.ALU_comb_inst0 fu = getattr(alu, fu) other_fu = set(_EXPENSIVE[other_op][1] for other_op in _EXPENSIVE if other_op != op) other_fu = [getattr(alu, k) for k in other_fu] def _test_instr(instr): # Configure PE. tester.reset() config_data = core.get_config_bitstream(instr) for addr, data in config_data: tester.configure(addr, data) # Stream data. for _ in range(100): a = _make_random(BV) b = _make_random(BV) tester.poke(circuit.data0, a) tester.poke(circuit.data1, b) tester.eval() expected, _, _ = core.wrapper.model(instr, a, b) tester.expect(circuit.alu_res, expected) for other_fu_i in other_fu: tester.expect(other_fu_i.I0, 0) tester.expect(other_fu_i.I1, 0) for instr in instrs: _test_instr(instr) with tempfile.TemporaryDirectory() as tempdir: if is_float: assert os.path.isdir(_CAD_DIR) ext_srcs = list(map(os.path.basename, dw_files)) ext_srcs += ["DW_fp_addsub.v"] ext_srcs = [os.path.join(_CAD_DIR, src) for src in ext_srcs] tester.compile_and_run(target="system-verilog", simulator="ncsim", magma_output="coreir-verilog", ext_srcs=ext_srcs, magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir,) else: for filename in dw_files: shutil.copy(filename, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}, "verilator_debug": True}, directory=tempdir, flags=["-Wno-fatal"])