def insert_valid(id_to_name, netlist, bus): io_valid = None for net_id, net in netlist.items(): if bus[net_id] != 1: continue for blk_id, port in net[1:]: blk_name = id_to_name[blk_id] if blk_id[0] in {"i", "I"} and "valid" in blk_name: return # need to insert the const 1 bit net as well alu_instr, _ = __get_alu_mapping("add") lut = __get_lut_mapping("lutFF") kargs = {} kargs["cond"] = Cond.LUT kargs["lut"] = lut instr = inst(alu_instr, **kargs) # adding a new pe block new_pe_blk = get_new_id("p", len(id_to_name), id_to_name) new_net_id = get_new_id("e", len(netlist), netlist) new_io_blk = get_new_id("i", len(id_to_name), id_to_name) netlist[new_net_id] = [(new_pe_blk, "res_p"), (new_io_blk, "f2io_1")] id_to_name[new_pe_blk] = "always_valid" id_to_name[new_io_blk] = "io1_valid" bus[new_net_id] = 1 print("inserting net", new_net_id, netlist[new_net_id])
def test_pe_config(dw_files): core = PeakCore(PE_fc) core.name = lambda: "PECore" circuit = core.circuit() # random test stuff tester = BasicTester(circuit, circuit.clk, circuit.reset) tester.reset() tester.poke(circuit.interface["stall"], 1) config_data = core.get_config_bitstream( add(ra_mode=Mode_t.DELAY, rb_mode=Mode_t.DELAY)) # hacky way to configure it as 0x42 + 0x42 from the operand register config_data += [(3, 0x42 << 16 | 0x42)] for addr, data in config_data: print("{0:08X} {1:08X}".format(addr, data)) tester.configure(addr, data) tester.config_read(addr) tester.eval() tester.expect(circuit.read_config_data, data) for i in range(10): tester.poke(circuit.interface["data0"], i + 1) tester.poke(circuit.interface["data1"], i + 1) tester.eval() tester.expect(circuit.interface["alu_res"], 0x42 + 0x42) tester.reset() lut_val = lut_and().lut config_data = core.get_config_bitstream( inst(alu=ALU_t.Add, lut=lut_val, rd_mode=Mode_t.DELAY, re_mode=Mode_t.DELAY, rf_mode=Mode_t.DELAY)) config_data += [(4, 0x7)] tester.poke(circuit.interface["bit0"], 0) tester.poke(circuit.interface["bit1"], 0) tester.eval() tester.expect(circuit.interface["res_p"], 1) with tempfile.TemporaryDirectory() as tempdir: for filename in dw_files: shutil.copy(filename, tempdir) tester.compile_and_run(target="verilator", magma_output="coreir-verilog", magma_opts={"coreir_libs": {"float_DW"}}, directory=tempdir, flags=["-Wno-fatal"])
def insert_valid_delay(id_to_name, instance_to_instr, netlist, bus): # find out the valid out io_valid = None new_reg_id = None found = False for net_id, net in netlist.items(): if bus[net_id] != 1: continue for idx, (blk_id, port) in enumerate(net[1:]): blk_name = id_to_name[blk_id] if blk_id[0] in {"i", "I"} and "valid" in blk_name: io_valid = (blk_id, port) # we have to create two new nets new_reg_id = get_new_id("p", len(id_to_name), id_to_name) id_to_name[new_reg_id] = "reg_valid_delay" # this is a lut as well with delay on one side alu_instr, _ = __get_alu_mapping("add") kargs = {} kargs["cond"] = Cond.LUT kargs["lut"] = B0 kargs["rd_mode"] = Mode.DELAY instr = inst(alu_instr, **kargs) instance_to_instr[id_to_name[new_reg_id]] = instr # add a mux to the valid output new_pe_id = get_new_id("p", len(id_to_name), id_to_name) id_to_name[new_pe_id] = "reset_valid_reg" alu_instr, _ = __get_alu_mapping("add") kargs = {} kargs["cond"] = Cond.LUT kargs["lut"] = (B2 & B1) | ((~B2) & B0) kargs["re_mode"] = Mode.CONST kargs["re_const"] = 0 instr = inst(alu_instr, **kargs) instance_to_instr[id_to_name[new_pe_id]] = instr new_net_id = get_new_id("e", len(netlist), netlist) netlist[new_net_id] = [(new_pe_id, "res_p"), (new_reg_id, "bit0")] bus[new_net_id] = 1 net[1 + idx] = (new_pe_id, "bit0") # find the reset net reset_blk_id = insert_reset(id_to_name) reset_net_id = None for net_id, net in netlist.items(): if net[0][0] == reset_blk_id: reset_net_id = net_id break if reset_net_id is None: reset_net_id = get_new_id("e", len(netlist), netlist) netlist[reset_net_id] = [(reset_blk_id, "io2f_1")] bus[reset_net_id] = 1 netlist[reset_net_id].append((new_pe_id, "bit2")) found = True break if found: break assert io_valid is not None assert new_reg_id is not None new_net_id = get_new_id("e", len(netlist), netlist) print("adding delay reg net", new_net_id) netlist[new_net_id] = [(new_reg_id, "res_p"), io_valid] bus[new_net_id] = 1
def map_app(pre_map): with tempfile.NamedTemporaryFile() as temp_file: src_file = pre_map #src_file = temp_file.name #subprocess.check_call(["mapper", pre_map, src_file]) netlist, folded_blocks, id_to_name, changed_pe = \ parse_and_pack_netlist(src_file, fold_reg=True) rename_id_changed(id_to_name, changed_pe) bus = determine_track_bus(netlist, id_to_name) blks = get_blks(netlist) connections, instances = read_netlist_json(src_file) name_to_id = {} for blk_id in id_to_name: name_to_id[id_to_name[blk_id]] = blk_id instance_to_instr = {} for name in instances: instance = instances[name] blk_id = name_to_id[name] if blk_id in folded_blocks: continue blk_id = name_to_id[name] # it might be absorbed already if blk_id not in blks: continue # find out the PE type tile_op, _ = get_tile_op(instance, blk_id, changed_pe) if tile_op is None: continue pins = get_tile_pins(blk_id, tile_op, folded_blocks, instances, changed_pe, id_to_name, connections) def get_mode(pin_name): if pin_name == "wire": return Mode.BYPASS, 0 elif pin_name == "reg": return Mode.DELAY, 0 else: assert "const" in pin_name return Mode.CONST, int(pin_name.split("_")[-1]) if "mem" in tile_op: args = tile_op.split("_") mem_mode = args[1] instr = {} if mem_mode == "lb": instr["mode"] = MemoryMode.DB instr["depth"] = int(args[-1]) if instr["depth"] > 512: split_ub(blk_id, netlist, id_to_name, bus, instance_to_instr, instr) instr["chain_en"] = 1 instr["chain_idx"] = 0 elif mem_mode == "sram": instr["mode"] = MemoryMode.SRAM content = json.loads(args[-1]) instr["content"] = content elif mem_mode == "ub": instr["is_ub"] = True instr["mode"] = MemoryMode.DB params = json.loads("_".join(args[2:])) instr.update(params) if instr["depth"] > 512: new_ub_names, idx = split_ub(blk_id, netlist, id_to_name, bus, instance_to_instr, instr) instr["chain_en"] = 1 instr["chain_idx"] = idx else: ra_mode, ra_value = get_mode(pins[0]) rb_mode, rb_value = get_mode(pins[1]) kargs = { "ra_mode": ra_mode, "rb_mode": rb_mode, "ra_const": ra_value, "rb_const": rb_value } if len(pins) > 2 and "lut" not in tile_op: # it's a mux rd_mode, rd_value = get_mode(pins[2]) kargs["rd_mode"] = rd_mode kargs["rd_const"] = rd_value if "lut" == tile_op[:3]: alu_instr, signed = __get_alu_mapping("add") lut = __get_lut_mapping(tile_op) kargs["cond"] = Cond.LUT kargs["lut"] = lut # lut has different mode names # this is fine because we never do packing for different widths rd_mode, rd_value = get_mode(pins[0]) re_mode, re_value = get_mode(pins[1]) rf_mode, rf_value = get_mode(pins[2]) kargs["rd_mode"] = rd_mode kargs["re_mode"] = re_mode kargs["rf_mode"] = rf_mode kargs["rd_const"] = rd_value kargs["re_const"] = re_value kargs["rf_const"] = rf_value else: alu_instr, signed = __get_alu_mapping(tile_op) if tile_op == "uge": kargs["cond"] = Cond.UGE elif tile_op == "ule": kargs["cond"] = Cond.ULE elif tile_op == "ugt": kargs["cond"] = Cond.UGT elif tile_op == "ult": kargs["cond"] = Cond.ULT elif tile_op == "sge": kargs["cond"] = Cond.SGE elif tile_op == "sle": kargs["cond"] = Cond.SLE elif tile_op == "sgt": kargs["cond"] = Cond.SGT elif tile_op == "slt": kargs["cond"] = Cond.SLT elif tile_op == "eq": kargs["cond"] = Cond.Z elif tile_op == "neq": kargs["cond"] = Cond.Z_n kargs["signed"] = signed instr = inst(alu_instr, **kargs) instance_to_instr[name] = instr netlist = port_rename(netlist) insert_valid(id_to_name, netlist, bus) if has_rom(id_to_name): insert_valid_delay(id_to_name, instance_to_instr, netlist, bus) wire_reset_to_flush(netlist, id_to_name, bus) remove_dead_regs(netlist, bus) return id_to_name, instance_to_instr, netlist, bus
def __call__(self, in0 : Data32, in1 : Data32) -> Data32: inst_lsb = asm.inst(asm.ALU_t.Sub, cond=asm.Cond_t.C) inst_msb = asm.sbc() lsb, cout, _ = self.pe_lsb(inst_lsb, data0=in0[:16], data1=in1[:16]) msb, _, _ = self.pe_msb(inst_msb, data0=in0[16:], data1=in1[16:], bit0=cout) return Data32.concat(lsb, msb)