def test_build_dataflow_directory(): test_dir = make_build_dir("test_build_dataflow_directory_") target_dir = test_dir + "/build_dataflow" example_data_dir = pk.resource_filename("finn.qnn-data", "build_dataflow/") copytree(example_data_dir, target_dir) build_dataflow_directory(target_dir) # check the generated files output_dir = target_dir + "/output_tfc_w1a1_Pynq-Z1" assert os.path.isfile(output_dir + "/time_per_step.json") assert os.path.isfile(output_dir + "/final_hw_config.json") assert os.path.isfile(output_dir + "/stitched_ip/ip/component.xml") assert os.path.isfile(output_dir + "/driver/driver.py") assert os.path.isfile(output_dir + "/report/estimate_layer_cycles.json") assert os.path.isfile(output_dir + "/report/estimate_layer_resources.json") assert os.path.isfile( output_dir + "/report/estimate_layer_config_alternatives.json" ) assert os.path.isfile(output_dir + "/report/estimate_network_performance.json") assert os.path.isfile(output_dir + "/report/ooc_synth_and_timing.json") assert os.path.isfile(output_dir + "/report/rtlsim_performance.json") assert os.path.isfile(output_dir + "/bitfile/finn-accel.bit") assert os.path.isfile(output_dir + "/bitfile/finn-accel.hwh") assert os.path.isfile(output_dir + "/report/post_synth_resources.xml") assert os.path.isfile(output_dir + "/report/post_route_timing.rpt") # verification outputs verify_out_dir = output_dir + "/verification_output" assert os.path.isfile(verify_out_dir + "/verify_initial_python_SUCCESS.npy") assert os.path.isfile(verify_out_dir + "/verify_streamlined_python_SUCCESS.npy") assert os.path.isfile(verify_out_dir + "/verify_folded_hls_cppsim_SUCCESS.npy") assert os.path.isfile(verify_out_dir + "/verify_stitched_ip_rtlsim_SUCCESS.npy")
def pyverilate_stitched_ip(model): "Given a model with stitched IP, return a PyVerilator sim object." if PyVerilator is None: raise ImportError("Installation of PyVerilator is required.") vivado_stitch_proj_dir = model.get_metadata_prop("vivado_stitch_proj") with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f: all_verilog_srcs = f.read().split() def file_to_dir(x): return os.path.dirname(os.path.realpath(x)) def file_to_basename(x): return os.path.basename(os.path.realpath(x)) all_verilog_dirs = list(map(file_to_dir, all_verilog_srcs)) all_verilog_files = list( set( filter( lambda x: x.endswith(".v"), list(map(file_to_basename, all_verilog_srcs)), ))) top_module_name = model.get_metadata_prop("wrapper_filename") top_module_name = file_to_basename(top_module_name).strip(".v") build_dir = make_build_dir("pyverilator_ipstitched_") sim = PyVerilator.build( all_verilog_files, verilog_path=all_verilog_dirs, build_dir=build_dir, trace_depth=get_rtlsim_trace_depth(), top_module_name=top_module_name, auto_eval=False, ) return sim
def prepare_rtlsim(self): """Creates a Verilator emulation library for the RTL code generated for this node, sets the rtlsim_so attribute to its path and returns a PyVerilator wrapper around it.""" if PyVerilator is None: raise ImportError("Installation of PyVerilator is required.") # ensure that code is generated code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") assert (code_gen_dir != ""), """Node attribute "code_gen_dir_ipgen" is not set. Please run HLSSynthIP first.""" verilog_file = self.get_verilog_top_filename() assert os.path.isfile( verilog_file), "Cannot find top-level Verilog file." # build the Verilator emu library sim = PyVerilator.build( verilog_file, build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), verilog_path=[ "{}/project_{}/sol1/impl/verilog/".format( code_gen_dir, self.onnx_node.name) ], trace_depth=get_rtlsim_trace_depth(), ) # save generated lib filename in attribute self.set_nodeattr("rtlsim_so", sim.lib._name) return sim
def test_end2end_ext_weights_build(): model_file = get_checkpoint_name("download") load_test_checkpoint_or_skip(model_file) build_env = get_build_env(build_kind, target_clk_ns) folding_config_file = pk.resource_filename( "finn.qnn-data", "test_ext_weights/tfc-w1a1-extw.json") output_dir = make_build_dir("test_end2end_ext_weights_build") cfg = build.DataflowBuildConfig( output_dir=output_dir, folding_config_file=folding_config_file, synth_clk_period_ns=target_clk_ns, board=build_env["board"], shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ, generate_outputs=[ build_cfg.DataflowOutputType.ESTIMATE_REPORTS, build_cfg.DataflowOutputType.BITFILE, build_cfg.DataflowOutputType.PYNQ_DRIVER, build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE, ], ) build.build_dataflow_cfg(model_file, cfg) assert os.path.isfile(output_dir + "/deploy/bitfile/finn-accel.bit") assert os.path.isfile(output_dir + "/deploy/bitfile/finn-accel.hwh") assert os.path.isfile(output_dir + "/deploy/driver/driver.py") assert os.path.isfile(output_dir + "/deploy/driver/runtime_weights/idma0.npy") if os.path.isdir(get_checkpoint_name("build")): shutil.rmtree(get_checkpoint_name("build")) shutil.copytree(output_dir + "/deploy", get_checkpoint_name("build"))
def apply(self, model): # TODO we currently assume that all dataflow nodes are connected to # each other, forming a single partition. check the assumption and/or # improve this. all_nodes = list(model.graph.node) df_nodes = filter( lambda x: get_by_name(x.attribute, "backend") is not None, all_nodes) df_nodes = filter( lambda x: get_by_name(x.attribute, "backend").s.decode("UTF-8") == "fpgadataflow", df_nodes, ) df_nodes = list(df_nodes) non_df_nodes = filter(lambda x: x not in df_nodes, all_nodes) non_df_nodes = list(non_df_nodes) if len(df_nodes) == 0: # no changes if no dataflow nodes are present return (model, False) else: # partition the model into two models df_model = copy.deepcopy(model) non_df_model = model # remove all non-dataflow nodes from the dataflow model for node_to_remove in non_df_nodes: df_model.graph.node.remove(node_to_remove) # identify the entry and exit points for the dataflow part df_in = df_model.graph.node[0].input[0] df_out = df_model.graph.node[-1].output[0] df_in_vi = df_model.get_tensor_valueinfo(df_in) df_out_vi = df_model.get_tensor_valueinfo(df_out) # set df graph in/out to be df_in/df_out df_model.graph.input.remove(df_model.graph.input[0]) df_model.graph.input.insert(0, df_in_vi) df_model.graph.output.remove(df_model.graph.output[0]) df_model.graph.output.insert(0, df_out_vi) df_model_dir = make_build_dir("dataflow_partition_") df_model_filename = df_model_dir + "/df_model.onnx" df_model.save(df_model_filename) # remove all dataflow nodes from the non-dataflow model # keep track of where the dataflow part starts df_start_ind = all_nodes.index(df_nodes[0]) for node_to_remove in df_nodes: non_df_model.graph.node.remove(node_to_remove) # create StreamingDataflow node with df_in/df_out io df_node = helper.make_node( "StreamingDataflowPartition", [df_in], [df_out], # use the model attribute to mark the df model model=df_model_filename, ) non_df_model.graph.node.insert(df_start_ind, df_node) return (non_df_model, False)
def apply(self, model): # set metadata properties accordingly to user input specifications model.set_metadata_prop("pynq_ip", self.ip) model.set_metadata_prop("pynq_port", str(self.port)) model.set_metadata_prop("pynq_username", self.username) model.set_metadata_prop("pynq_password", self.password) model.set_metadata_prop("pynq_target_dir", self.target_dir) # create directory for deployment files deployment_dir = make_build_dir(prefix="pynq_deployment_") model.set_metadata_prop("pynq_deployment_dir", deployment_dir) # get and copy necessary files # .bit and .hwh file vivado_pynq_proj = model.get_metadata_prop("vivado_pynq_proj") for file in os.listdir(vivado_pynq_proj): if file.endswith(".bit"): bitfile = os.path.join(vivado_pynq_proj, file) elif file.endswith(".hwh"): hwhfile = os.path.join(vivado_pynq_proj, file) copy(bitfile, deployment_dir) copy(hwhfile, deployment_dir) # driver.py and python libraries pynq_driver_dir = model.get_metadata_prop("pynq_driver_dir") copy_tree(pynq_driver_dir, deployment_dir) model.set_metadata_prop("pynq_deploy_dir", deployment_dir) model.set_metadata_prop("exec_mode", "remote_pynq") # create target directory on PYNQ board cmd = 'sshpass -p {} ssh {}@{} -p {} "mkdir -p {}"'.format( self.password, self.username, self.ip, self.port, self.target_dir) bash_command = ["/bin/bash", "-c", cmd] process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) process_compile.communicate() # copy directory to PYNQ board using scp and sshpass cmd = "sshpass -p {} scp -P{} -r {} {}@{}:{}".format( self.password, self.port, deployment_dir, self.username, self.ip, self.target_dir, ) bash_command = ["/bin/bash", "-c", cmd] process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) process_compile.communicate() return (model, False)
def test_end2end_cybsec_mlp_build(QONNX_export): model_file = get_checkpoint_name("export", QONNX_export) load_test_checkpoint_or_skip(model_file) build_env = get_build_env(build_kind, target_clk_ns) output_dir = make_build_dir( f"test_end2end_cybsec_mlp_build_QONNX-{QONNX_export}") cfg = build.DataflowBuildConfig( output_dir=output_dir, target_fps=1000000, synth_clk_period_ns=target_clk_ns, board=build_env["board"], shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ, generate_outputs=[ build_cfg.DataflowOutputType.ESTIMATE_REPORTS, build_cfg.DataflowOutputType.BITFILE, build_cfg.DataflowOutputType.PYNQ_DRIVER, build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE, ], ) build.build_dataflow_cfg(model_file, cfg) # check the generated files assert os.path.isfile(output_dir + "/time_per_step.json") assert os.path.isfile(output_dir + "/final_hw_config.json") assert os.path.isfile(output_dir + "/driver/driver.py") est_cycles_report = output_dir + "/report/estimate_layer_cycles.json" assert os.path.isfile(est_cycles_report) est_res_report = output_dir + "/report/estimate_layer_resources.json" assert os.path.isfile(est_res_report) assert os.path.isfile(output_dir + "/report/estimate_network_performance.json") assert os.path.isfile(output_dir + "/bitfile/finn-accel.bit") assert os.path.isfile(output_dir + "/bitfile/finn-accel.hwh") assert os.path.isfile(output_dir + "/report/post_synth_resources.xml") assert os.path.isfile(output_dir + "/report/post_route_timing.rpt") # examine the report contents with open(est_cycles_report, "r") as f: est_cycles_dict = json.load(f) assert est_cycles_dict["StreamingFCLayer_Batch_0"] == 80 assert est_cycles_dict["StreamingFCLayer_Batch_1"] == 64 with open(est_res_report, "r") as f: est_res_dict = json.load(f) assert est_res_dict["total"]["LUT"] == 11360.0 assert est_res_dict["total"]["BRAM_18K"] == 36.0 shutil.copytree(output_dir + "/deploy", get_checkpoint_name("build", QONNX_export))
def apply(self, model): def file_to_basename(x): return os.path.basename(os.path.realpath(x)) vivado_stitch_proj_dir = model.get_metadata_prop("vivado_stitch_proj") assert vivado_stitch_proj_dir is not None, "Need stitched IP to run." top_module_name = model.get_metadata_prop("wrapper_filename") top_module_name = file_to_basename(top_module_name).strip(".v") build_dir = make_build_dir("synth_out_of_context_") with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f: all_verilog_srcs = f.read().split() for file in all_verilog_srcs: if file.endswith(".v"): copy2(file, build_dir) ret = out_of_context_synth(build_dir, top_module_name, self.part, self.clk_name, self.clk_period_ns) model.set_metadata_prop("res_total_ooc_synth", str(ret)) return (model, False)
def prepare_rtlsim(self): """Creates a Verilator emulation library for the RTL code generated for this node, sets the rtlsim_so attribute to its path and returns a PyVerilator wrapper around it.""" if PyVerilator is None: raise ImportError("Installation of PyVerilator is required.") verilog_paths = self.get_all_verilog_paths() verilog_files = self.get_all_verilog_filenames() # build the Verilator emu library sim = PyVerilator.build( verilog_files, build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), verilog_path=verilog_paths, trace_depth=get_rtlsim_trace_depth(), top_module_name=self.get_verilog_top_module_name(), ) # save generated lib filename in attribute self.set_nodeattr("rtlsim_so", sim.lib._name) return sim
def _codegen_single_node(node, model, fpgapart, clk): """Calls C++ code generation for one node. Resulting code can be used to generate a Vivado IP block for the node.""" op_type = node.op_type try: # lookup op_type in registry of CustomOps inst = registry.custom_op[op_type](node) # get the path of the code generation directory code_gen_dir = inst.get_nodeattr("code_gen_dir_ipgen") # ensure that there is a directory if code_gen_dir == "" or not os.path.isdir(code_gen_dir): code_gen_dir = make_build_dir( prefix="code_gen_ipgen_" + str(node.op_type) + "_" ) inst.set_nodeattr("code_gen_dir_ipgen", code_gen_dir) # ensure that there is generated code inside the dir inst.code_generation_ipgen(model, fpgapart, clk) except KeyError: # exception if op_type is not supported raise Exception("Custom op_type %s is currently not supported." % op_type)
def _codegen_single_node(node, model): """Calls C++ code generation for one node. Resulting code can be used to simulate node using cppsim.""" op_type = node.op_type try: # lookup op_type in registry of CustomOps inst = registry.getCustomOp(node) # get the path of the code generation directory code_gen_dir = inst.get_nodeattr("code_gen_dir_cppsim") # ensure that there is a directory if code_gen_dir == "" or not os.path.isdir(code_gen_dir): code_gen_dir = make_build_dir(prefix="code_gen_cppsim_" + str(node.name) + "_") inst.set_nodeattr("code_gen_dir_cppsim", code_gen_dir) # ensure that there is generated code inside the dir inst.code_generation_cppsim(model) except KeyError: # exception if op_type is not supported raise Exception("Custom op_type %s is currently not supported." % op_type)
def pyverilate_stitched_ip(model): "Given a model with stitched IP, return a PyVerilator sim object." if PyVerilator is None: raise ImportError("Installation of PyVerilator is required.") vivado_stitch_proj_dir = model.get_metadata_prop("vivado_stitch_proj") with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f: all_verilog_srcs = f.read().split() def file_to_dir(x): return os.path.dirname(os.path.realpath(x)) all_verilog_dirs = list(map(file_to_dir, all_verilog_srcs)) top_verilog = model.get_metadata_prop("wrapper_filename") build_dir = make_build_dir("pyverilator_ipstitched_") sim = PyVerilator.build( top_verilog, verilog_path=all_verilog_dirs, build_dir=build_dir, trace_depth=get_rtlsim_trace_depth(), ) return sim
def apply(self, model): # create a temporary folder for the generated driver pynq_driver_dir = make_build_dir(prefix="pynq_driver_") model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir) # create the base FINN driver -- same for all accels driver_base_template = pk.resource_filename( "finn.qnn-data", "templates/driver/driver_base.py" ) driver_base_py = pynq_driver_dir + "/driver_base.py" shutil.copy(driver_base_template, driver_base_py) # extract input-output shapes from the graph # TODO convert this to an analysis pass? idt = [] idma_names = [] ishape_normal = [] ishape_folded = [] ishape_packed = [] for idma_ind, graph_in in enumerate(model.graph.input): i_tensor_name = graph_in.name # get inp tensor properties i_tensor_dt = model.get_tensor_datatype(i_tensor_name) i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name)) # go down into dataflow partition to get folded shape info etc # TODO consider setting these as attributes during dataflow partitioning i_consumer = model.find_consumer(i_tensor_name) assert ( i_consumer.op_type == "StreamingDataflowPartition" ), """ Ensure CreateDataflowPartition called before driver creation.""" first_df_model = ModelWrapper(getCustomOp(i_consumer).get_nodeattr("model")) assert ( first_df_model.graph.node[0].op_type == "IODMA" ), "First partition must hold input IODMA" successors = model.find_direct_successors(i_consumer) successor_input_num = list(successors[0].input).index(i_consumer.output[0]) successor_sdp = getCustomOp(successors[0]) successor_df_model = ModelWrapper(successor_sdp.get_nodeattr("model")) first_node = successor_df_model.find_consumer( successor_df_model.graph.input[successor_input_num].name ) i_tensor_shape_folded = tuple( getCustomOp(first_node).get_folded_input_shape() ) # generate dummy folded i/o tensors and their packed versions i_tensor_dummy_folded = gen_finn_dt_tensor( i_tensor_dt, i_tensor_shape_folded ) i_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray( i_tensor_dummy_folded, i_tensor_dt ) i_tensor_shape_packed = i_tensor_dummy_packed.shape # append all input tensor info to relevant lists idt.append("DataType['%s']" % i_tensor_dt.name) ishape_normal.append(i_tensor_shape_normal) ishape_folded.append(i_tensor_shape_folded) ishape_packed.append(i_tensor_shape_packed) idma_names.append(getCustomOp(i_consumer).get_nodeattr("instance_name")) odt = [] odma_names = [] oshape_normal = [] oshape_folded = [] oshape_packed = [] for odma_ind, graph_out in enumerate(model.graph.output): o_tensor_name = graph_out.name # get inp tensor properties o_tensor_dt = model.get_tensor_datatype(o_tensor_name) o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name)) # go down into IODMA partition to get folded shape info etc # TODO consider setting these as attributes during dataflow partitioning o_producer = model.find_producer(o_tensor_name) assert ( o_producer.op_type == "StreamingDataflowPartition" ), """ Ensure CreateDataflowPartition called before driver creation.""" df_model = ModelWrapper(getCustomOp(o_producer).get_nodeattr("model")) assert ( df_model.graph.node[-1].op_type == "IODMA" ), "Partition must hold output IODMA" predecessors = model.find_direct_predecessors(o_producer) predecessor_output_num = list(predecessors[0].output).index( o_producer.input[0] ) predecessor_sdp = getCustomOp(predecessors[0]) predecessor_df_model = ModelWrapper(predecessor_sdp.get_nodeattr("model")) last_node = predecessor_df_model.find_producer( predecessor_df_model.graph.output[predecessor_output_num].name ) o_tensor_shape_folded = tuple( getCustomOp(last_node).get_folded_output_shape() ) o_tensor_dummy_folded = gen_finn_dt_tensor( o_tensor_dt, o_tensor_shape_folded ) o_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray( o_tensor_dummy_folded, o_tensor_dt ) o_tensor_shape_packed = o_tensor_dummy_packed.shape # append all output tensor info to relevant lists odt.append("DataType['%s']" % o_tensor_dt.name) oshape_normal.append(o_tensor_shape_normal) oshape_folded.append(o_tensor_shape_folded) oshape_packed.append(o_tensor_shape_packed) odma_names.append(getCustomOp(o_producer).get_nodeattr("instance_name")) # generate external weights npy files weights_dir = pynq_driver_dir + "/runtime_weights" os.makedirs(weights_dir) idma_idx = 0 ext_weight_dma_cnt = 0 for node in model.graph.node: assert ( node.op_type == "StreamingDataflowPartition" ), "CreateDataflowPartition needs to be applied before driver generation" if len(node.input) > 0: producer = model.find_producer(node.input[0]) init_tensor = model.get_initializer(node.input[0]) else: producer = None init_tensor = None if producer is None: # input dma? sdp_inst = getCustomOp(node) idma_name = sdp_inst.get_nodeattr("instance_name") df_model = ModelWrapper(sdp_inst.get_nodeattr("model")) assert df_model.graph.node[0].op_type == "IODMA" iodma_node = getCustomOp(df_model.graph.node[0]) if iodma_node.get_nodeattr("burstMode") == "wrap": # input weights dma? init_tensor = df_model.get_initializer( iodma_node.onnx_node.input[0] ) ext_weight_dma_cnt += 1 w_dtype = df_model.get_tensor_datatype( iodma_node.onnx_node.input[0] ) init_external_tensor = to_external_tensor(init_tensor, w_dtype) np.save( weights_dir + "/" + idma_name + ".npy", init_external_tensor ) idma_idx += 1 # fill in the driver template driver_py = pynq_driver_dir + "/driver.py" driver = template_driver.pynq_driver_template driver = driver.replace("$PLATFORM$", self.platform) driver = driver.replace("$INPUT_FINN_DATATYPE$", str(idt).replace('"', "")) driver = driver.replace("$INPUT_SHAPE_NORMAL$", str(ishape_normal)) driver = driver.replace("$INPUT_SHAPE_FOLDED$", str(ishape_folded)) driver = driver.replace("$INPUT_SHAPE_PACKED$", str(ishape_packed)) driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(odt).replace('"', "")) driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", str(oshape_normal)) driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", str(oshape_folded)) driver = driver.replace("$OUTPUT_SHAPE_PACKED$", str(oshape_packed)) driver = driver.replace("$INPUT_DMA_NAME$", "%s" % str(idma_names)) driver = driver.replace("$OUTPUT_DMA_NAME$", "%s" % str(odma_names)) driver = driver.replace("$NUM_INPUTS$", str(len(idma_names))) driver = driver.replace("$NUM_OUTPUTS$", str(len(odma_names))) driver = driver.replace("$EXT_WEIGHT_NUM$", str(ext_weight_dma_cnt)) with open(driver_py, "w") as f: f.write(driver) # add validate.py to run full top-1 test (only for suitable networks) validate_py = pynq_driver_dir + "/validate.py" validate_template = pk.resource_filename( "finn.qnn-data", "templates/driver/validate.py" ) shutil.copy(validate_template, validate_py) # copy all the dependencies into the driver folder # driver imports utils/data_packing and core/datatype # both of which are in finn-base # e.g. /workspace/finn-base/src/finn/util/data_packing.py dpk_root = dpk.__file__ # e.g. /workspace/finn-base/src/finn/util dpk_root = dpk_root.replace("data_packing.py", "") # e.g. /workspace/finn-base/src/finn/core/datatype.py dtp_root = dtp.__file__ # e.g. /workspace/finn-base/src/finn/core dtp_root = dtp_root.replace("datatype.py", "") shutil.copytree(dpk_root, pynq_driver_dir + "/finn/util") shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core") # generate weight files for runtime-writable layers for sdp_ind, sdp_node in enumerate(model.graph.node): assert sdp_node.op_type == "StreamingDataflowPartition" # get dataflow model sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") dataflow_model = ModelWrapper(dataflow_model_filename) rt_layer_ind = 0 for node in dataflow_model.graph.node: if node.op_type in ["StreamingFCLayer_Batch", "Thresholding_Batch"]: node_inst = getCustomOp(node) is_rt_weights = node_inst.get_nodeattr("runtime_writeable_weights") if is_rt_weights == 1: fcl_w = dataflow_model.get_initializer(node.input[1]) w_filename = weights_dir + "/%d_%d_%s.dat" % ( sdp_ind, rt_layer_ind, node.name, ) node_inst.make_weight_file( fcl_w, "decoupled_runtime", w_filename ) rt_layer_ind += 1 elif node.op_type == "StreamingDataflowPartition": warnings.warn( """Nested StreamingDataflowPartition are not supported """ ) else: continue return (model, False)
def apply(self, model): _check_vitis_envvars() # create a config file and empty list of xo files config = ["[connectivity]"] object_files = [] idma_idx = 0 odma_idx = 0 instance_names = {} for node in model.graph.node: assert node.op_type == "StreamingDataflowPartition", "Invalid link graph" sdp_node = getCustomOp(node) dataflow_model_filename = sdp_node.get_nodeattr("model") kernel_model = ModelWrapper(dataflow_model_filename) kernel_xo = kernel_model.get_metadata_prop("vitis_xo") object_files.append(kernel_xo) # gather info on connectivity # assume each node connected to outputs/inputs is DMA: # has axis, aximm and axilite # everything else is axis-only # assume only one connection from each ip to the next # all aximm allocated to DDR[0] # all kernels allocated to SLR0 producer = model.find_producer(node.input[0]) consumer = model.find_consumers(node.output[0]) # define kernel instances # name kernels connected to graph inputs as idmaxx # name kernels connected to graph inputs as odmaxx if producer is None: instance_names[node.name] = "idma" + str(idma_idx) config.append("nk=%s:1:%s" % (node.name, instance_names[node.name])) idma_idx += 1 elif consumer is None: instance_names[node.name] = "odma" + str(odma_idx) config.append("nk=%s:1:%s" % (node.name, instance_names[node.name])) odma_idx += 1 else: instance_names[node.name] = node.name config.append("nk=%s:1:%s" % (node.name, instance_names[node.name])) # assign SLRs config.append("slr=%s:SLR0" % instance_names[node.name]) # assign memory banks if producer is None or consumer is None: config.append("sp=%s.m_axi_gmem0:DDR[%d]" % (instance_names[node.name], 0)) # connect streams if producer is not None: for i in range(len(node.input)): producer = model.find_producer(node.input[i]) if producer is not None: j = list(producer.output).index(node.input[i]) config.append( "stream_connect=%s.m_axis_%d:%s.s_axis_%d" % ( instance_names[producer.name], j, instance_names[node.name], i, )) # create a temporary folder for the project link_dir = make_build_dir(prefix="vitis_link_proj_") model.set_metadata_prop("vitis_link_proj", link_dir) # add Vivado physopt directives if desired if self.strategy == VitisOptStrategy.PERFORMANCE_BEST: config.append("[vivado]") config.append( "prop=run.impl_1.STEPS.OPT_DESIGN.ARGS.DIRECTIVE=ExploreWithRemap" ) config.append( "prop=run.impl_1.STEPS.PLACE_DESIGN.ARGS.DIRECTIVE=Explore") config.append( "prop=run.impl_1.STEPS.PHYS_OPT_DESIGN.IS_ENABLED=true") config.append( "prop=run.impl_1.STEPS.PHYS_OPT_DESIGN.ARGS.DIRECTIVE=Explore") config.append( "prop=run.impl_1.STEPS.ROUTE_DESIGN.ARGS.DIRECTIVE=Explore") config = "\n".join(config) + "\n" with open(link_dir + "/config.txt", "w") as f: f.write(config) # create tcl script to generate resource report in XML format gen_rep_xml = templates.vitis_gen_xml_report_tcl_template gen_rep_xml = gen_rep_xml.replace("$VITIS_PROJ_PATH$", link_dir) with open(link_dir + "/gen_report_xml.tcl", "w") as f: f.write(gen_rep_xml) debug_commands = [] if self.enable_debug: for inst in list(instance_names.values()): debug_commands.append("--dk chipscope:%s" % inst) # create a shell script and call Vitis script = link_dir + "/run_vitis_link.sh" working_dir = os.environ["PWD"] with open(script, "w") as f: f.write("#!/bin/bash \n") f.write("cd {}\n".format(link_dir)) f.write("v++ -t hw --platform %s --link %s" " --kernel_frequency %d --config config.txt --optimize %s" " --save-temps -R2 %s\n" % ( self.platform, " ".join(object_files), self.f_mhz, self.strategy.value, " ".join(debug_commands), )) f.write("cd {}\n".format(working_dir)) bash_command = ["bash", script] process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) process_compile.communicate() # TODO rename xclbin appropriately here? xclbin = link_dir + "/a.xclbin" assert os.path.isfile(xclbin), ( "Vitis .xclbin file not created, check logs under %s" % link_dir) model.set_metadata_prop("bitfile", xclbin) # run Vivado to gen xml report gen_rep_xml_sh = link_dir + "/gen_report_xml.sh" working_dir = os.environ["PWD"] with open(gen_rep_xml_sh, "w") as f: f.write("#!/bin/bash \n") f.write("cd {}\n".format(link_dir)) f.write("vivado -mode batch -source %s\n" % (link_dir + "/gen_report_xml.tcl")) f.write("cd {}\n".format(working_dir)) bash_command = ["bash", gen_rep_xml_sh] process_genxml = subprocess.Popen(bash_command, stdout=subprocess.PIPE) process_genxml.communicate() # filename for the synth utilization report synth_report_filename = link_dir + "/synth_report.xml" model.set_metadata_prop("vivado_synth_rpt", synth_report_filename) return (model, False)
def apply(self, model): # read in a user-specified floorplan or generate a default one if self.user_floorplan is None: floorplan = model.analysis(floorplan_params) json_dir = make_build_dir(prefix="vitis_floorplan_") json_file = json_dir + "/floorplan.json" model.set_metadata_prop("floorplan_json", json_file) with open(json_file, "w") as f: json.dump(floorplan, f, indent=4) else: model.set_metadata_prop("floorplan_json", self.user_floorplan) model = model.transform(ApplyConfig(self.user_floorplan)) # perform DWC and FIFO specific adjustments unassigned_nodes = 0 for node in model.graph.node: node_inst = getCustomOp(node) node_slr = node_inst.get_nodeattr("slr") if node_slr == -1: unassigned_nodes += 1 if node.op_type == "StreamingDataWidthConverter_Batch": # if we have SLR assignment already. use that if node_slr != -1: continue # optimize for possible SLR crossing in_width = node_inst.get_nodeattr("inWidth") out_width = node_inst.get_nodeattr("outWidth") # find neighbour with narrowest bus if in_width > out_width: narrow_neighbour = model.find_consumer(node.output[0]) else: narrow_neighbour = model.find_producer(node.input[0]) node_slr = getCustomOp(narrow_neighbour).get_nodeattr("slr") node_inst.set_nodeattr("slr", node_slr) if node.op_type == "StreamingFIFO": # if we have SLR assignment already. use that if node_slr != -1: continue srcnode = model.find_producer(node.input[0]) node_slr = getCustomOp(srcnode).get_nodeattr("slr") node_inst.set_nodeattr("slr", node_slr) if unassigned_nodes > 0: warnings.warn( str(unassigned_nodes) + " nodes have no entry in the provided floorplan " + "and no default value was set" ) # partition id generation partition_cnt = 0 # Assign IODMAs to their own partitions all_nodes = list(model.graph.node) df_nodes = list( filter(lambda x: get_by_name(x.attribute, "backend") is not None, all_nodes) ) dma_nodes = list(filter(lambda x: x.op_type == "IODMA", df_nodes)) non_dma_nodes = list(filter(lambda x: x not in dma_nodes, df_nodes)) dyn_tlastmarker_nodes = list( filter( lambda x: x.op_type == "TLastMarker" and getCustomOp(x).get_nodeattr("DynIters") == "true", non_dma_nodes, ) ) non_dma_nodes = list( filter(lambda x: x not in dyn_tlastmarker_nodes, non_dma_nodes) ) for node in dma_nodes: node_inst = getCustomOp(node) node_inst.set_nodeattr("partition_id", partition_cnt) partition_cnt += 1 for node in dyn_tlastmarker_nodes: node_inst = getCustomOp(node) node_inst.set_nodeattr("partition_id", partition_cnt) partition_cnt += 1 for node in non_dma_nodes: pre_node = model.find_producer(node.input[0]) node_inst = getCustomOp(node) if pre_node not in non_dma_nodes: # input node node_inst.set_nodeattr("partition_id", partition_cnt) partition_cnt += 1 continue elif not ( node.op_type == "StreamingFCLayer_Batch" and node_inst.get_nodeattr("mem_mode") is not None and node_inst.get_nodeattr("mem_mode") == "external" ): pre_nodes = model.find_direct_predecessors(node) else: pre_nodes = [pre_node] node_slr = node_inst.get_nodeattr("slr") for pre_node in pre_nodes: pre_inst = getCustomOp(pre_node) pre_slr = pre_inst.get_nodeattr("slr") if node_slr == pre_slr: partition_id = pre_inst.get_nodeattr("partition_id") node_inst.set_nodeattr("partition_id", partition_id) break else: # no matching, new partition node_inst.set_nodeattr("partition_id", partition_cnt) partition_cnt += 1 # save the updated floorplan floorplan = model.analysis(floorplan_params) with open(model.get_metadata_prop("floorplan_json"), "w") as f: json.dump(floorplan, f, indent=4) return (model, False)
def apply(self, model): pynq_shell_path = os.environ["PYNQSHELL_PATH"] if not os.path.isdir(pynq_shell_path): raise Exception( "Ensure the PYNQ-HelloWorld utility repo is cloned.") ipstitch_path = model.get_metadata_prop("vivado_stitch_proj") if ipstitch_path is None or (not os.path.isdir(ipstitch_path)): raise Exception( "No stitched IPI design found, apply CreateStitchedIP first.") vivado_stitch_vlnv = model.get_metadata_prop("vivado_stitch_vlnv") if vivado_stitch_vlnv is None: raise Exception( "No vlnv for stitched IP found, apply CreateStitchedIP first.") # collect list of all IP dirs ip_dirs = ["list"] for node in model.graph.node: ip_dir_attribute = get_by_name(node.attribute, "ip_path") assert (ip_dir_attribute is not None), """Node attribute "ip_path" is empty. Please run transformation HLSSynth_ipgen first.""" ip_dir_value = ip_dir_attribute.s.decode("UTF-8") assert os.path.isdir(ip_dir_value), """The directory that should contain the generated ip blocks doesn't exist.""" ip_dirs += [ip_dir_value] ip_dirs += [ipstitch_path + "/ip"] ip_dirs_str = "[%s]" % (" ".join(ip_dirs)) # extract HLSCustomOp instances to get i/o stream widths i_tensor_name = model.graph.input[0].name o_tensor_name = model.graph.output[0].name first_node = getCustomOp(model.find_consumer(i_tensor_name)) last_node = getCustomOp(model.find_producer(o_tensor_name)) i_bits_per_cycle = first_node.get_instream_width() o_bits_per_cycle = last_node.get_outstream_width() # ensure i/o is padded to bytes i_bits_per_cycle_padded = roundup_to_integer_multiple( i_bits_per_cycle, 8) o_bits_per_cycle_padded = roundup_to_integer_multiple( o_bits_per_cycle, 8) assert (i_bits_per_cycle_padded % 8 == 0), """Padded input bits are not a multiple of 8.""" assert (o_bits_per_cycle_padded % 8 == 0), """Padded output bits are not a multiple of 8.""" in_bytes = i_bits_per_cycle_padded / 8 out_bytes = o_bits_per_cycle_padded / 8 in_if_name = "in0_V_V_0" out_if_name = "out_r_0" clk_name = "ap_clk_0" nrst_name = "ap_rst_n_0" axi_lite_if_name = "s_axi_control_0" vivado_ip_cache = os.getenv("VIVADO_IP_CACHE", default="") # TODO get from Transformation arg or metadata_prop fclk_mhz = 100.0 # create a temporary folder for the project vivado_pynq_proj_dir = make_build_dir(prefix="vivado_pynq_proj_") model.set_metadata_prop("vivado_pynq_proj", vivado_pynq_proj_dir) # filename for the synth utilization report synth_report_filename = vivado_pynq_proj_dir + "/synth_report.xml" model.set_metadata_prop("vivado_synth_rpt", synth_report_filename) ip_config_tcl = templates.ip_config_tcl_template % ( vivado_pynq_proj_dir, ip_dirs_str, vivado_pynq_proj_dir, synth_report_filename, vivado_stitch_vlnv, in_bytes, out_bytes, in_if_name, out_if_name, clk_name, nrst_name, axi_lite_if_name, vivado_ip_cache, fclk_mhz, ) with open(vivado_pynq_proj_dir + "/ip_config.tcl", "w") as f: f.write(ip_config_tcl) # create a shell script for project creation and synthesis make_project_sh = vivado_pynq_proj_dir + "/make_project.sh" working_dir = os.environ["PWD"] ipcfg = vivado_pynq_proj_dir + "/ip_config.tcl" with open(make_project_sh, "w") as f: f.write(templates.call_pynqshell_makefile_template % (pynq_shell_path, self.platform, ipcfg, "block_design", working_dir)) synth_project_sh = vivado_pynq_proj_dir + "/synth_project.sh" with open(synth_project_sh, "w") as f: f.write(templates.call_pynqshell_makefile_template % (pynq_shell_path, self.platform, ipcfg, "bitstream", working_dir)) # call the project creation script # synthesis script will be called with a separate transformation bash_command = ["bash", make_project_sh] process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) process_compile.communicate() return (model, False)
def apply(self, model): # ensure non-relative readmemh .dat files model = model.transform(ReplaceVerilogRelPaths()) ip_dirs = ["list"] # add RTL streamer IP ip_dirs.append("/workspace/finn/finn-rtllib/memstream") # ensure that all nodes are fpgadataflow, and that IPs are generated for node in model.graph.node: assert is_finn_op(node.domain), "Found non-FINN node" backend_attribute = get_by_name(node.attribute, "backend") assert backend_attribute is not None, "Backend node attribute is not set." backend_value = backend_attribute.s.decode("UTF-8") assert (backend_value == "fpgadataflow" ), """Backend node attribute is not set to "fpgadataflow".""" node_inst = getCustomOp(node) ip_dir_value = node_inst.get_nodeattr("ip_path") assert os.path.isdir( ip_dir_value), "IP generation directory doesn't exist." ip_dirs += [ip_dir_value] self.create_cmds += node_inst.code_generation_ipi() my_producer = model.find_producer(node.input[0]) self.connect_clk_rst(node) self.connect_axi(node) if my_producer is None: # first node in graph self.connect_s_axis_external(node) if node.op_type == "TLastMarker": assert (node_inst.get_nodeattr("Direction") == "in" ), """Output TLastMarker incorrect direction""" elif node.op_type == "IODMA" and len(model.graph.node) != 1: # don't apply this check for a 1-node partition assert (node_inst.get_nodeattr("direction") == "in" ), """Input DMA incorrect direction""" else: # intermediate node # wire up input(s) to previous node output(s) # foreach input # find producer # find index of producer output connected to our target input # get names of hdl interfaces for input and producer output # issue a TCL directive to connect input to output # if FC layer with mode "decoupled", add a streamer on input 1 for i in range(len(node.input)): producer = model.find_producer(node.input[i]) if producer is None: continue j = list(producer.output).index(node.input[i]) src_intf_name = getCustomOp( producer).get_verilog_top_module_intf_names( )["m_axis"][j] dst_intf_name = node_inst.get_verilog_top_module_intf_names( )["s_axis"][i] self.connect_cmds.append( "connect_bd_intf_net [get_bd_intf_pins %s/%s] " "[get_bd_intf_pins %s/%s]" % (producer.name, src_intf_name, node.name, dst_intf_name)) if model.find_consumers(node.output[0]) is None: # last node in graph self.connect_m_axis_external(node) if node.op_type == "TLastMarker": assert (node_inst.get_nodeattr("Direction") == "out" ), """Output TLastMarker incorrect direction""" elif node.op_type == "IODMA" and len(model.graph.node) != 1: assert (node_inst.get_nodeattr("direction") == "out" ), """Output DMA incorrect direction""" # create a temporary folder for the project prjname = "finn_vivado_stitch_proj" vivado_stitch_proj_dir = make_build_dir(prefix="vivado_stitch_proj_") model.set_metadata_prop("vivado_stitch_proj", vivado_stitch_proj_dir) # start building the tcl script tcl = [] # create vivado project tcl.append("create_project %s %s -part %s" % (prjname, vivado_stitch_proj_dir, self.fpgapart)) # add all the generated IP dirs to ip_repo_paths ip_dirs_str = " ".join(ip_dirs) tcl.append("set_property ip_repo_paths [%s] [current_project]" % ip_dirs_str) tcl.append("update_ip_catalog") # create block design and instantiate all layers block_name = self.ip_name tcl.append('create_bd_design "%s"' % block_name) tcl.extend(self.create_cmds) tcl.extend(self.connect_cmds) fclk_mhz = 1 / (self.clk_ns * 0.001) fclk_hz = fclk_mhz * 1000000 model.set_metadata_prop("clk_ns", str(self.clk_ns)) tcl.append("set_property CONFIG.FREQ_HZ %f [get_bd_ports /ap_clk]" % fclk_hz) tcl.append("regenerate_bd_layout") tcl.append("validate_bd_design") tcl.append("save_bd_design") # create wrapper hdl (for rtlsim later on) bd_base = "%s/%s.srcs/sources_1/bd/%s" % ( vivado_stitch_proj_dir, prjname, block_name, ) bd_filename = "%s/%s.bd" % (bd_base, block_name) tcl.append("make_wrapper -files [get_files %s] -top" % bd_filename) wrapper_filename = "%s/hdl/%s_wrapper.v" % (bd_base, block_name) tcl.append("add_files -norecurse %s" % wrapper_filename) model.set_metadata_prop("wrapper_filename", wrapper_filename) # synthesize to DCP and export stub, DCP and constraints if self.vitis: tcl.append( "set_property SYNTH_CHECKPOINT_MODE Hierarchical [ get_files %s ]" % bd_filename) tcl.append( "set_property -name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} " "-value {-mode out_of_context} -objects [get_runs synth_1]") num_workers = get_num_default_workers() assert num_workers >= 0, "Number of workers must be nonnegative." if num_workers == 0: num_workers = mp.cpu_count() tcl.append("launch_runs synth_1 -jobs %s" % str(num_workers)) tcl.append("wait_on_run [get_runs synth_1]") tcl.append("open_run synth_1 -name synth_1") tcl.append("write_verilog -force -mode synth_stub %s.v" % block_name) tcl.append("write_checkpoint %s.dcp" % block_name) tcl.append("write_xdc %s.xdc" % block_name) tcl.append("report_utilization -file %s_partition_util.rpt" % block_name) # export block design itself as an IP core block_vendor = "xilinx_finn" block_library = "finn" block_vlnv = "%s:%s:%s:1.0" % (block_vendor, block_library, block_name) model.set_metadata_prop("vivado_stitch_vlnv", block_vlnv) model.set_metadata_prop("vivado_stitch_ifnames", str(self.intf_names)) tcl.append( ("ipx::package_project -root_dir %s/ip -vendor %s " "-library %s -taxonomy /UserIP -module %s -import_files") % (vivado_stitch_proj_dir, block_vendor, block_library, block_name)) tcl.append("set_property core_revision 2 [ipx::find_open_core %s]" % block_vlnv) tcl.append("ipx::create_xgui_files [ipx::find_open_core %s]" % block_vlnv) # if targeting Vitis, add some properties to the IP if self.vitis: tcl.append( "ipx::remove_bus_parameter FREQ_HZ " "[ipx::get_bus_interfaces CLK.AP_CLK -of_objects [ipx::current_core]]" ) # replace source code with dcp tcl.append( "set_property sdx_kernel true [ipx::find_open_core %s]" % block_vlnv) tcl.append( "set_property sdx_kernel_type rtl [ipx::find_open_core %s]" % block_vlnv) tcl.append( "set_property supported_families { } [ipx::find_open_core %s]" % block_vlnv) tcl.append( "set_property xpm_libraries {XPM_CDC XPM_MEMORY XPM_FIFO} " "[ipx::find_open_core %s]" % block_vlnv) tcl.append("set_property auto_family_support_level level_2 " "[ipx::find_open_core %s]" % block_vlnv) # remove all files from synthesis and sim groups # we'll replace with DCP, stub, and xdc tcl.append( "ipx::remove_all_file " "[ipx::get_file_groups xilinx_anylanguagebehavioralsimulation]" ) tcl.append("ipx::remove_all_file " "[ipx::get_file_groups xilinx_anylanguagesynthesis]") tcl.append( "ipx::remove_file_group " "xilinx_anylanguagebehavioralsimulation [ipx::current_core]") tcl.append("ipx::remove_file_group " "xilinx_anylanguagesynthesis [ipx::current_core]") # remove sim and src folders tcl.append("file delete -force %s/ip/sim" % vivado_stitch_proj_dir) tcl.append("file delete -force %s/ip/src" % vivado_stitch_proj_dir) # copy and add DCP, stub, and xdc tcl.append("file mkdir %s/ip/dcp" % vivado_stitch_proj_dir) tcl.append("file mkdir %s/ip/impl" % vivado_stitch_proj_dir) tcl.append("file copy -force %s.dcp %s/ip/dcp" % (block_name, vivado_stitch_proj_dir)) tcl.append("file copy -force %s.xdc %s/ip/impl" % (block_name, vivado_stitch_proj_dir)) tcl.append( "ipx::add_file_group xilinx_implementation [ipx::current_core]" ) tcl.append( "ipx::add_file impl/%s.xdc [ipx::get_file_groups xilinx_implementation]" % block_name) tcl.append( "set_property used_in [list implementation] " "[ipx::get_files impl/%s.xdc " "-of_objects [ipx::get_file_groups xilinx_implementation]]" % block_name) tcl.append("ipx::add_file_group " "xilinx_synthesischeckpoint [ipx::current_core]") tcl.append("ipx::add_file dcp/%s.dcp " "[ipx::get_file_groups xilinx_synthesischeckpoint]" % block_name) tcl.append( "ipx::add_file_group xilinx_simulationcheckpoint [ipx::current_core]" ) tcl.append("ipx::add_file dcp/%s.dcp " "[ipx::get_file_groups xilinx_simulationcheckpoint]" % block_name) tcl.append("ipx::update_checksums [ipx::find_open_core %s]" % block_vlnv) tcl.append("ipx::save_core [ipx::find_open_core %s]" % block_vlnv) # export list of used Verilog files (for rtlsim later on) tcl.append( "set all_v_files [get_files -filter {FILE_TYPE == Verilog " + "&& USED_IN_SYNTHESIS == 1} ]") v_file_list = "%s/all_verilog_srcs.txt" % vivado_stitch_proj_dir tcl.append("set fp [open %s w]" % v_file_list) # write each verilog filename to all_verilog_srcs.txt tcl.append("foreach vf $all_v_files {puts $fp $vf}") tcl.append("close $fp") # write the project creator tcl script tcl_string = "\n".join(tcl) + "\n" with open(vivado_stitch_proj_dir + "/make_project.tcl", "w") as f: f.write(tcl_string) # create a shell script and call Vivado make_project_sh = vivado_stitch_proj_dir + "/make_project.sh" working_dir = os.environ["PWD"] with open(make_project_sh, "w") as f: f.write("#!/bin/bash \n") f.write("cd {}\n".format(vivado_stitch_proj_dir)) f.write("vivado -mode batch -source make_project.tcl\n") f.write("cd {}\n".format(working_dir)) bash_command = ["bash", make_project_sh] process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) process_compile.communicate() return (model, False)
def apply(self, model): # set metadata properties accordingly to user input specifications model.set_metadata_prop("pynq_ip", self.ip) model.set_metadata_prop("pynq_port", str(self.port)) model.set_metadata_prop("pynq_username", self.username) model.set_metadata_prop("pynq_password", self.password) model.set_metadata_prop("pynq_target_dir", self.target_dir) # create directory for deployment files deployment_dir = make_build_dir(prefix="pynq_deployment_") model.set_metadata_prop("pynq_deployment_dir", deployment_dir) # get and copy necessary files # .bit and .hwh file bitfile = model.get_metadata_prop("bitfile") hwh_file = model.get_metadata_prop("hw_handoff") deploy_files = [bitfile, hwh_file] for dfile in deploy_files: if dfile is not None: copy(dfile, deployment_dir) # helper script for Alveo platform = model.get_metadata_prop("platform") if platform == "alveo": alveo_run_sh = templates.alveo_run_sh_template fill_dict = { "$REMOTE_DEPLOY_DIR$": self.target_dir + "/" + os.path.basename(deployment_dir), "$CONDA_ENV_NAME$": "finn-pynq-alveo", "$REMOTE_XRT$": os.environ["XILINX_XRT"], "$REMOTE_PLATFORM_REPO_PATHS$": os.environ["PLATFORM_REPO_PATHS"], "$BITFILE$": os.path.basename(bitfile), } for key, value in fill_dict.items(): alveo_run_sh = alveo_run_sh.replace(key, value) alveo_run_sh_path = deployment_dir + "/alveo_run.sh" with open(alveo_run_sh_path, "w") as f: f.write(alveo_run_sh) # driver.py and python libraries pynq_driver_dir = model.get_metadata_prop("pynq_driver_dir") copy_tree(pynq_driver_dir, deployment_dir) model.set_metadata_prop("pynq_deploy_dir", deployment_dir) model.set_metadata_prop("exec_mode", "remote_pynq") # create target directory on PYNQ board cmd = 'ssh {}@{} -p {} "mkdir -p {}"'.format(self.username, self.ip, self.port, self.target_dir) bash_command = ["/bin/bash", "-c", cmd] process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) process_compile.communicate() # copy directory to PYNQ board using scp cmd = "scp -P{} -r {} {}@{}:{}".format(self.port, deployment_dir, self.username, self.ip, self.target_dir) bash_command = ["/bin/bash", "-c", cmd] process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) process_compile.communicate() return (model, False)
import onnx.numpy_helper as nph import torch from brevitas.export.onnx.generic.manager import BrevitasONNXManager from pkgutil import get_data from qonnx.util.cleanup import cleanup as qonnx_cleanup import finn.core.onnx_exec as oxe from finn.core.modelwrapper import ModelWrapper from finn.transformation.fold_constants import FoldConstants from finn.transformation.general import RemoveStaticGraphInputs from finn.transformation.infer_shapes import InferShapes from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN from finn.util.basic import make_build_dir from finn.util.test import get_test_model_trained export_onnx_path = make_build_dir("test_brevitas_fc_") # act bits @pytest.mark.parametrize("abits", [1, 2]) # weight bits @pytest.mark.parametrize("wbits", [1, 2]) # network topology / size @pytest.mark.parametrize("size", ["TFC", "SFC", "LFC"]) # QONNX export @pytest.mark.parametrize("QONNX_export", [False, True]) def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits, QONNX_export): if size == "LFC" and wbits == 2 and abits == 2: pytest.skip("No LFC-w2a2 present at the moment") if wbits > abits: pytest.skip("No wbits > abits cases at the moment")
def test_brevitas_mobilenet(): # get single image as input and prepare image img = Image.open("/workspace/finn/tests/brevitas/king_charles.jpg") # resize smallest side of the image to 256 pixels and resize larger side # with same ratio img = resize_smaller_side(256, img) # crop central 224*224 window img = crop_center(224, img) # save image as numpy array and as torch tensor to enable testing in # brevitas/pytorch and finn and transpose from (H, W, C) to (C, H, W) img_np = np.asarray(img).copy().astype(np.float32).transpose(2, 0, 1) img_np = img_np.reshape(1, 3, 224, 224) img_torch = torch.from_numpy(img_np).float() # export preprocess export_onnx_path = make_build_dir("test_brevitas_mobilenet-v1_") preproc_onnx = export_onnx_path + "/quant_mobilenet_v1_4b_preproc.onnx" mean = [0.485, 0.456, 0.406] std = 0.226 ch = 3 preproc = NormalizePreProc(mean, std, ch) bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx) preproc_model = ModelWrapper(preproc_onnx) # set input finn datatype to UINT8 preproc_model.set_tensor_datatype(preproc_model.graph.input[0].name, DataType.UINT8) preproc_model = preproc_model.transform(InferShapes()) preproc_model = preproc_model.transform(GiveUniqueNodeNames()) preproc_model = preproc_model.transform(GiveUniqueParameterTensors()) preproc_model = preproc_model.transform(GiveReadableTensorNames()) finn_onnx = export_onnx_path + "/quant_mobilenet_v1_4b_exported.onnx" mobilenet = get_test_model_trained("mobilenet", 4, 4) bo.export_finn_onnx(mobilenet, (1, 3, 224, 224), finn_onnx) # do forward pass in PyTorch/Brevitas input_tensor = preproc.forward(img_torch) expected = mobilenet.forward(input_tensor).detach().numpy() expected_topk = expected.flatten() expected_top5 = np.argsort(expected_topk)[-5:] expected_top5 = np.flip(expected_top5) expected_top5_prob = [] for index in expected_top5: expected_top5_prob.append(expected_topk[index]) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(InsertTopK()) # get initializer from Mul that will be absorbed into topk a0 = model.get_initializer(model.graph.node[-2].input[1]) model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveUniqueParameterTensors()) model = model.transform(GiveReadableTensorNames()) model.save(export_onnx_path + "/quant_mobilenet_v1_4b_wo_preproc.onnx") model = model.transform(MergeONNXModels(preproc_model)) model.save(export_onnx_path + "/quant_mobilenet_v1_4b.onnx") idict = {model.graph.input[0].name: img_np} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] produced_prob = odict["TopK_0_out0"] * a0 assert (produced.flatten() == expected_top5).all() assert np.isclose(produced_prob.flatten(), expected_top5_prob).all()
def test_brevitas_compare_exported_mobilenet(): if "IMAGENET_VAL_PATH" not in os.environ.keys(): pytest.skip("Can't do validation without IMAGENET_VAL_PATH") n_images = 10 debug_mode = False export_onnx_path = make_build_dir("test_brevitas_mobilenet-v1_") # export preprocessing preproc_onnx = export_onnx_path + "/quant_mobilenet_v1_4b_preproc.onnx" preproc = NormalizePreProc(mean, std, ch) bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx) preproc_model = ModelWrapper(preproc_onnx) preproc_model = preproc_model.transform(InferShapes()) preproc_model = preproc_model.transform(GiveUniqueNodeNames()) preproc_model = preproc_model.transform(GiveUniqueParameterTensors()) preproc_model = preproc_model.transform(GiveReadableTensorNames()) # export the actual MobileNet-v1 finn_onnx = export_onnx_path + "/quant_mobilenet_v1_4b.onnx" mobilenet = get_test_model_trained("mobilenet", 4, 4) if debug_mode: dbg_hook = bo.enable_debug(mobilenet) bo.export_finn_onnx(mobilenet, (1, 3, 224, 224), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) model = model.transform(InsertTopK()) # get initializer from Mul that will be absorbed into topk a0 = model.get_initializer(model.get_nodes_by_op_type("Mul")[-1].input[1]) model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveUniqueParameterTensors()) model = model.transform(GiveReadableTensorNames()) model.save(export_onnx_path + "/quant_mobilenet_v1_4b_wo_preproc.onnx") # create merged preprocessing + MobileNet-v1 model model = model.transform(MergeONNXModels(preproc_model)) model.save(export_onnx_path + "/quant_mobilenet_v1_4b.onnx") with open( export_onnx_path + "/mobilenet_validation.csv", "w", newline="" ) as csvfile: writer = csv.writer(csvfile) writer.writerow( [ "goldenID", "brevitasTop5", "brevitasTop5[%]", "finnTop5", "finnTop5[%]", "top5equal", "top5%equal", ] ) csvfile.flush() workload = imagenet_util.get_val_images(n_images, interleave_classes=True) all_inds_ok = True all_probs_ok = True for (img_path, target_id) in workload: img_np = imagenet_util.load_resize_crop(img_path) img_torch = torch.from_numpy(img_np).float() # do forward pass in PyTorch/Brevitas input_tensor = preproc.forward(img_torch) expected = mobilenet.forward(input_tensor).detach().numpy() expected_topk = expected.flatten() expected_top5 = np.argsort(expected_topk)[-5:] expected_top5 = np.flip(expected_top5) expected_top5_prob = [] for index in expected_top5: expected_top5_prob.append(expected_topk[index]) idict = {model.graph.input[0].name: img_np} odict = oxe.execute_onnx(model, idict, return_full_exec_context=True) produced = odict[model.graph.output[0].name] produced_prob = odict["TopK_0_out0"] * a0 inds_ok = (produced.flatten() == expected_top5).all() probs_ok = np.isclose(produced_prob.flatten(), expected_top5_prob).all() all_inds_ok = all_inds_ok and inds_ok all_probs_ok = all_probs_ok and probs_ok writer.writerow( [ str(target_id), str(expected_top5), str(expected_top5_prob), str(produced.flatten()), str(produced_prob.flatten()), str(inds_ok), str(probs_ok), ] ) csvfile.flush() if ((not inds_ok) or (not probs_ok)) and debug_mode: print("Results differ for %s" % img_path) # check all tensors at debug markers names_brevitas = set(dbg_hook.values.keys()) names_finn = set(odict.keys()) names_common = names_brevitas.intersection(names_finn) for dbg_name in names_common: if not np.isclose( dbg_hook.values[dbg_name].detach().numpy(), odict[dbg_name], atol=1e-3, ).all(): print("Tensor %s differs between Brevitas and FINN" % dbg_name) assert all_inds_ok and all_probs_ok
def apply(self, model): vivado_pynq_proj = model.get_metadata_prop("vivado_pynq_proj") if vivado_pynq_proj is None or (not os.path.isdir(vivado_pynq_proj)): raise Exception("No PYNQ project found, apply MakePYNQProject first.") # create a temporary folder for the generated driver pynq_driver_dir = make_build_dir(prefix="pynq_driver_") model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir) # extract input-output shapes from the graph # TODO convert this to an analysis pass i_tensor_name = model.graph.input[0].name o_tensor_name = model.graph.output[0].name i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name)) o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name)) i_tensor_dt = model.get_tensor_datatype(i_tensor_name) o_tensor_dt = model.get_tensor_datatype(o_tensor_name) # extract HLSCustomOp instances to get folded i/o shapes first_node = getCustomOp(model.find_consumer(i_tensor_name)) last_node = getCustomOp(model.find_producer(o_tensor_name)) i_tensor_shape_folded = tuple(first_node.get_folded_input_shape()) o_tensor_shape_folded = tuple(last_node.get_folded_output_shape()) # generate dummy folded i/o tensors and their packed versions i_tensor_dummy_folded = gen_finn_dt_tensor(i_tensor_dt, i_tensor_shape_folded) o_tensor_dummy_folded = gen_finn_dt_tensor(o_tensor_dt, o_tensor_shape_folded) i_tensor_dummy_packed = finnpy_to_packed_bytearray( i_tensor_dummy_folded, i_tensor_dt ) o_tensor_dummy_packed = finnpy_to_packed_bytearray( o_tensor_dummy_folded, o_tensor_dt ) i_tensor_shape_packed = i_tensor_dummy_packed.shape o_tensor_shape_packed = o_tensor_dummy_packed.shape # fill in the driver template driver_py = pynq_driver_dir + "/driver.py" driver = templates.pynq_driver_template def mss(x, batch_var_name="N"): # "make shape string" # for a shape like (1, ...) emit a string (N, ...) # where N is the default value for batch_var_name # this lets the driver work with a batch of samples at once ret = str(x) ret = ret.replace("(1,", "(%s," % batch_var_name) ret = ret.replace("[1,", "[%s," % batch_var_name) return ret driver = driver.replace("$INPUT_FINN_DATATYPE$", str(i_tensor_dt)) driver = driver.replace("$INPUT_SHAPE_NORMAL$", mss(i_tensor_shape_normal)) driver = driver.replace("$INPUT_SHAPE_FOLDED$", mss(i_tensor_shape_folded)) driver = driver.replace("$INPUT_SHAPE_PACKED$", mss(i_tensor_shape_packed)) driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(o_tensor_dt)) driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", mss(o_tensor_shape_normal)) driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", mss(o_tensor_shape_folded)) driver = driver.replace("$OUTPUT_SHAPE_PACKED$", mss(o_tensor_shape_packed)) with open(driver_py, "w") as f: f.write(driver) # copy all the dependencies into the driver folder shutil.copytree( get_finn_root() + "/src/finn/util", pynq_driver_dir + "/finn/util" ) shutil.copytree( get_finn_root() + "/src/finn/core", pynq_driver_dir + "/finn/core" ) return (model, False)
def pyverilate_stitched_ip( model, read_internal_signals=True, disable_common_warnings=True, extra_verilator_args=[], ): """Given a model with stitched IP, return a PyVerilator sim object. Trace depth is also controllable, see get_rtlsim_trace_depth() :param read_internal_signals If set, it will be possible to examine the internal (not only port) signals of the Verilog module, but this may slow down compilation and emulation. :param disable_common_warnings If set, disable the set of warnings that Vivado-HLS-generated Verilog typically triggers in Verilator (which can be very verbose otherwise) """ if PyVerilator is None: raise ImportError("Installation of PyVerilator is required.") vivado_stitch_proj_dir = model.get_metadata_prop("vivado_stitch_proj") with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f: all_verilog_srcs = f.read().split() def file_to_dir(x): return os.path.dirname(os.path.realpath(x)) def file_to_basename(x): return os.path.basename(os.path.realpath(x)) top_module_file_name = file_to_basename( model.get_metadata_prop("wrapper_filename")) top_module_name = top_module_file_name.strip(".v") build_dir = make_build_dir("pyverilator_ipstitched_") # dump all Verilog code to a single file # this is because large models with many files require # a verilator command line too long for bash on most systems # NOTE: there are duplicates in this list, and some files # are identical but in multiple directories (regslice_core.v) # remove duplicates from list by doing list -> set -> list all_verilog_files = list( set(filter(lambda x: x.endswith(".v"), all_verilog_srcs))) # remove all but one instances of regslice_core.v filtered_verilog_files = [] remove_entry = False for vfile in all_verilog_files: if "regslice_core" in vfile: if not remove_entry: filtered_verilog_files.append(vfile) remove_entry = True else: filtered_verilog_files.append(vfile) # concatenate all verilog code into a single file with open(vivado_stitch_proj_dir + "/" + top_module_file_name, "w") as wf: for vfile in filtered_verilog_files: with open(vfile) as rf: wf.write("//Added from " + vfile + "\n\n") wf.write(rf.read()) verilator_args = [] # disable common verilator warnings that should be harmless but commonly occur # in large quantities for Vivado HLS-generated verilog code if disable_common_warnings: verilator_args += ["-Wno-STMTDLY"] verilator_args += ["-Wno-PINMISSING"] verilator_args += ["-Wno-IMPLICIT"] verilator_args += ["-Wno-WIDTH"] verilator_args += ["-Wno-COMBDLY"] # force inlining of all submodules to ensure we can read internal signals properly if read_internal_signals: verilator_args += ["--inline-mult", "0"] sim = PyVerilator.build( top_module_file_name, verilog_path=[vivado_stitch_proj_dir], build_dir=build_dir, trace_depth=get_rtlsim_trace_depth(), top_module_name=top_module_name, auto_eval=False, read_internal_signals=read_internal_signals, extra_args=verilator_args + extra_verilator_args, ) return sim
def apply(self, model): target_partition_id = 0 # we currently assume that all dataflow nodes belonging to the same partition # are connected to each other and there is a single input/output to/from each. # NOTE: all dataflow nodes with no partition_id set are moved to partition 0 # TODO: check the assumption and/or improve this. while True: all_nodes = list(model.graph.node) df_nodes = filter( lambda x: get_by_name(x.attribute, "backend") is not None, all_nodes) df_nodes = filter( lambda x: get_by_name(x.attribute, "backend").s. decode("UTF-8") == "fpgadataflow" and (get_by_name( x.attribute, "partition_id") is None or get_by_name( x.attribute, "partition_id").i == target_partition_id) and x.op_type != "StreamingDataflowPartition", df_nodes, ) df_nodes = list(df_nodes) non_df_nodes = filter(lambda x: x not in df_nodes, all_nodes) non_df_nodes = list(non_df_nodes) if len(df_nodes) == 0: # no changes if no dataflow nodes are present break else: # partition the model into two models df_model = copy.deepcopy(model) non_df_model = model # remove all non-dataflow nodes from the dataflow model for node_to_remove in non_df_nodes: df_model.graph.node.remove(node_to_remove) # identify the entry and exit points for the dataflow part df_in = df_model.graph.node[0].input[0] df_out = df_model.graph.node[-1].output[0] df_in_vi = df_model.get_tensor_valueinfo(df_in) df_out_vi = df_model.get_tensor_valueinfo(df_out) # set df graph in/out to be df_in/df_out df_model.graph.input.remove(df_model.graph.input[0]) df_model.graph.input.insert(0, df_in_vi) df_model.graph.output.remove(df_model.graph.output[0]) df_model.graph.output.insert(0, df_out_vi) # parse StreamingFCLayers looking for external weight memories fc_extw_nodes = filter( lambda x: x.op_type == "StreamingFCLayer_Batch" and get_by_name(x.attribute, "mem_mode") is not None and get_by_name(x.attribute, "mem_mode").s.decode( "UTF-8") == "external", df_nodes, ) fc_extw_nodes = list(fc_extw_nodes) extra_df_inputs = [] for i in range(len(fc_extw_nodes)): fc_weight_vi = df_model.get_tensor_valueinfo( fc_extw_nodes[i].input[1]) df_model.graph.input.insert(i + 1, fc_weight_vi) extra_df_inputs.append(fc_extw_nodes[i].input[1]) # save model df_model_dir = make_build_dir("dataflow_partition" + str(target_partition_id) + "_") df_model_filename = df_model_dir + "/df_model.onnx" df_model.cleanup() df_model.save(df_model_filename) # remove all dataflow nodes from the non-dataflow model # keep track of where the dataflow part starts df_start_ind = all_nodes.index(df_nodes[0]) # get and check floorplan inst = getCustomOp(df_nodes[0]) slr = inst.get_nodeattr("slr") for node in df_nodes[1:]: inst = getCustomOp(node) assert slr == inst.get_nodeattr("slr"), """all nodes with same partition_id must have the same slr id""" # check that there is only one non-null mem_port per partition nmemports = 0 mem_port = "" for node in df_nodes: inst = getCustomOp(node) port = inst.get_nodeattr("mem_port") if port is not None and port != "": nmemports += 1 mem_port = port assert nmemports <= 1, """too many memory ports per partition""" for node_to_remove in df_nodes: non_df_model.graph.node.remove(node_to_remove) # create StreamingDataflow node with df_in/df_out io df_node = helper.make_node( "StreamingDataflowPartition", [df_in] + extra_df_inputs, [df_out], # use the model attribute to mark the df model model=df_model_filename, domain="finn.custom_op.fpgadataflow", partition_id=target_partition_id, slr=slr, mem_port=mem_port, ) non_df_model.graph.node.insert(df_start_ind, df_node) model = non_df_model target_partition_id += 1 return (model, False)
def apply(self, model): # identify partitions to create original_nodes = list(model.graph.node) partition_ids = set(list(map(self.partitioning, original_nodes))) partition_ids.discard(-1) # prepare dir for generated .onnx models if self.partition_dir is None: self.partition_dir = make_build_dir("partitioning_") else: pathlib.Path(self.partition_dir).mkdir(parents=True, exist_ok=True) for partition_id in partition_ids: all_nodes = list(model.graph.node) partition_nodes = list( filter(lambda x: self.partitioning(x) == partition_id, all_nodes) ) non_partition_nodes = list( filter(lambda x: x not in partition_nodes, all_nodes) ) # partition the model into two models p_model = copy.deepcopy(model) non_p_model = model # remove all non-partition nodes from the partition model for node_to_remove in non_partition_nodes: p_model.graph.node.remove(node_to_remove) # identify the entry and exit points for the partition part p_in = [] p_out = [] p_start_ind = 0 for node in p_model.graph.node: for in_tensor in node.input: # check if producer has been removed = lies outside the partition has_initializer = in_tensor in [ x.name for x in p_model.graph.initializer ] has_producer = p_model.find_producer(in_tensor) is not None if not has_initializer and not has_producer: # the same tensor could feed multiple nodes within the partition # (e.g. for residual connections), so we avoid duplicates if in_tensor not in p_in: p_in.append(in_tensor) # keep track of where this partition starts topologically if p_start_ind == 0: p_start_ind = all_nodes.index(node) for out_tensor in node.output: # check if tensor is top-level output # or has a consumer outside the partition if out_tensor in [x.name for x in model.graph.output]: if out_tensor not in p_out: p_out.append(out_tensor) else: for consumer in model.find_consumers(out_tensor): if self.partitioning(consumer) != partition_id: if out_tensor not in p_out: p_out.append(out_tensor) p_in_vi = list(map(lambda x: p_model.get_tensor_valueinfo(x), p_in)) p_out_vi = list(map(lambda x: p_model.get_tensor_valueinfo(x), p_out)) # check if partitioning is legal (i.e. creates no cycles) to_check = [model.find_producer(x) for x in p_in] while len(to_check) > 0: next_to_check = [] for node in to_check: if node is not None: assert ( self.partitioning(node) != partition_id ), """cycle-free graph violated: partition depends on itself""" # print(node) predecessors = model.find_direct_predecessors(node) if predecessors is not None: next_to_check.extend(predecessors) to_check = next_to_check # set p graph in/out to be p_in/p_out while len(p_model.graph.input) > 0: p_model.graph.input.pop() for i in p_in_vi: p_model.graph.input.append(i) while len(p_model.graph.output) > 0: p_model.graph.output.pop() for o in p_out_vi: p_model.graph.output.append(o) # remove redundant input and output value_info entries for i in p_in_vi: if i in p_model.graph.value_info: p_model.graph.value_info.remove(i) for o in p_out_vi: if o in p_model.graph.value_info: p_model.graph.value_info.remove(o) # save partition model p_model_filename = ( self.partition_dir + "/partition_" + str(partition_id) + ".onnx" ) p_model.cleanup() p_model.save(p_model_filename) # insert GenericPartition node p_node = helper.make_node( "GenericPartition", p_in, p_out, name="GenericPartition_" + str(partition_id), # use the model attribute to mark the partition model model=p_model_filename, domain="finn.custom_op.general", ) non_p_model.graph.node.insert(p_start_ind, p_node) # remove all partition nodes from the parent model # do this after inserting the p_node for easier p_start_ind handling for node_to_remove in partition_nodes: non_p_model.graph.node.remove(node_to_remove) model = non_p_model return (model, False)
def apply(self, model): # create a config file and empty list of xo files config = [] idma_idx = 0 odma_idx = 0 aximm_idx = 0 axilite_idx = 0 global_clk_ns = 0 instance_names = {} for node in model.graph.node: assert node.op_type == "StreamingDataflowPartition", "Invalid link graph" sdp_node = getCustomOp(node) dataflow_model_filename = sdp_node.get_nodeattr("model") kernel_model = ModelWrapper(dataflow_model_filename) ipstitch_path = kernel_model.get_metadata_prop( "vivado_stitch_proj") if ipstitch_path is None or (not os.path.isdir(ipstitch_path)): raise Exception( "No stitched IPI design found for %s, apply CreateStitchedIP first." % node.name) vivado_stitch_vlnv = kernel_model.get_metadata_prop( "vivado_stitch_vlnv") if vivado_stitch_vlnv is None: raise Exception( "No vlnv found for %s, apply CreateStitchedIP first." % node.name) ip_dirs = ["list"] ip_dirs += collect_ip_dirs(kernel_model, ipstitch_path) ip_dirs_str = "[%s]" % (" ".join(ip_dirs)) config.append( "set_property ip_repo_paths " "[concat [get_property ip_repo_paths [current_project]] %s] " "[current_project]" % ip_dirs_str) config.append("update_ip_catalog -rebuild -scan_changes") # get metadata property clk_ns to calculate clock frequency clk_ns = float(kernel_model.get_metadata_prop("clk_ns")) if clk_ns > global_clk_ns: global_clk_ns = clk_ns ifnames = eval( kernel_model.get_metadata_prop("vivado_stitch_ifnames")) # gather info on connectivity # assume each node connected to outputs/inputs is DMA: # has axis, aximm and axilite # everything else is axis-only # assume only one connection from each ip to the next # all aximm allocated to DDR[0] # all kernels allocated to SLR0 producer = model.find_producer(node.input[0]) consumer = model.find_consumers(node.output[0]) # define kernel instances # name kernels connected to graph inputs as idmaxx # name kernels connected to graph inputs as odmaxx if producer is None or consumer is None: if producer is None: instance_names[node.name] = "idma" + str(idma_idx) elif consumer is None: instance_names[node.name] = "odma" + str(odma_idx) config.append("create_bd_cell -type ip -vlnv %s %s" % (vivado_stitch_vlnv, instance_names[node.name])) config.append( "connect_bd_intf_net [get_bd_intf_pins %s/m_axi_gmem0] " "[get_bd_intf_pins smartconnect_0/S%02d_AXI]" % (instance_names[node.name], aximm_idx)) assert (len(ifnames["axilite"]) == 1 ), "Must have 1 AXI lite interface on IODMA nodes" axilite_intf_name = ifnames["axilite"][0] assert axilite_intf_name is not None config.append( "connect_bd_intf_net [get_bd_intf_pins %s/%s] " "[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" % (instance_names[node.name], axilite_intf_name, axilite_idx)) idma_idx += 1 aximm_idx += 1 axilite_idx += 1 else: instance_names[node.name] = node.name config.append("create_bd_cell -type ip -vlnv %s %s" % (vivado_stitch_vlnv, instance_names[node.name])) for axilite_intf_name in ifnames["axilite"]: config.append( "connect_bd_intf_net [get_bd_intf_pins %s/%s] " "[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" % (instance_names[node.name], axilite_intf_name, axilite_idx)) axilite_idx += 1 config.append("connect_bd_net [get_bd_pins %s/ap_clk] " "[get_bd_pins smartconnect_0/aclk]" % instance_names[node.name]) config.append("connect_bd_net [get_bd_pins %s/ap_rst_n] " "[get_bd_pins smartconnect_0/aresetn]" % instance_names[node.name]) # connect streams if producer is not None: for i in range(len(node.input)): producer = model.find_producer(node.input[i]) if producer is not None: j = list(producer.output).index(node.input[i]) config.append( "connect_bd_intf_net [get_bd_intf_pins %s/s_axis_%d] " "[get_bd_intf_pins %s/m_axis_%d]" % ( instance_names[node.name], i, instance_names[producer.name], j, )) # create a temporary folder for the project vivado_pynq_proj_dir = make_build_dir(prefix="vivado_zynq_proj_") model.set_metadata_prop("vivado_pynq_proj", vivado_pynq_proj_dir) fclk_mhz = int(1 / (global_clk_ns * 0.001)) # create a TCL recipe for the project ipcfg = vivado_pynq_proj_dir + "/ip_config.tcl" config = "\n".join(config) + "\n" with open(ipcfg, "w") as f: f.write(templates.custom_zynq_shell_template % ( fclk_mhz, axilite_idx, aximm_idx, self.platform, pynq_part_map[self.platform], config, self.enable_debug, )) # create a TCL recipe for the project synth_project_sh = vivado_pynq_proj_dir + "/synth_project.sh" working_dir = os.environ["PWD"] with open(synth_project_sh, "w") as f: f.write("#!/bin/bash \n") f.write("cd {}\n".format(vivado_pynq_proj_dir)) f.write("vivado -mode batch -source %s\n" % ipcfg) f.write("cd {}\n".format(working_dir)) # call the synthesis script bash_command = ["bash", synth_project_sh] process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) process_compile.communicate() bitfile_name = (vivado_pynq_proj_dir + "/finn_zynq_link.runs/impl_1/top_wrapper.bit") if not os.path.isfile(bitfile_name): raise Exception("Synthesis failed, no bitfile found") deploy_bitfile_name = vivado_pynq_proj_dir + "/resizer.bit" copy(bitfile_name, deploy_bitfile_name) # set bitfile attribute model.set_metadata_prop("bitfile", deploy_bitfile_name) hwh_name = (vivado_pynq_proj_dir + "/finn_zynq_link.srcs/sources_1/bd/top/hw_handoff/top.hwh") if not os.path.isfile(hwh_name): raise Exception("Synthesis failed, no hardware handoff file found") deploy_hwh_name = vivado_pynq_proj_dir + "/resizer.hwh" copy(hwh_name, deploy_hwh_name) model.set_metadata_prop("hw_handoff", deploy_hwh_name) # filename for the synth utilization report synth_report_filename = vivado_pynq_proj_dir + "/synth_report.xml" model.set_metadata_prop("vivado_synth_rpt", synth_report_filename) return (model, False)
import finn.core.onnx_exec as oxe from finn.core.modelwrapper import ModelWrapper from finn.transformation.fold_constants import FoldConstants from finn.transformation.general import ( RemoveUnusedTensors, RemoveStaticGraphInputs, GiveReadableTensorNames, GiveUniqueNodeNames, ) from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline import Streamline from finn.util.test import get_test_model_trained from finn.util.basic import make_build_dir export_onnx_path = make_build_dir("test_streamline_cnv_") # act bits @pytest.mark.parametrize("abits", [1, 2]) # weight bits @pytest.mark.parametrize("wbits", [1, 2]) # network topology / size @pytest.mark.parametrize("size", ["CNV"]) def test_streamline_cnv(size, wbits, abits): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) bo.export_finn_onnx(fc, (1, 3, 32, 32), finn_onnx)
def pyverilate_stitched_ip(model, read_internal_signals=True): """Given a model with stitched IP, return a PyVerilator sim object. If read_internal_signals is True, it will be possible to examine the internal (not only port) signals of the Verilog module, but this may slow down compilation and emulation. Trace depth is also controllable, see get_rtlsim_trace_depth() """ if PyVerilator is None: raise ImportError("Installation of PyVerilator is required.") vivado_stitch_proj_dir = model.get_metadata_prop("vivado_stitch_proj") with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f: all_verilog_srcs = f.read().split() def file_to_dir(x): return os.path.dirname(os.path.realpath(x)) def file_to_basename(x): return os.path.basename(os.path.realpath(x)) top_module_file_name = file_to_basename( model.get_metadata_prop("wrapper_filename")) top_module_name = top_module_file_name.strip(".v") build_dir = make_build_dir("pyverilator_ipstitched_") # dump all Verilog code to a single file # this is because large models with many files require # a verilator command line too long for bash on most systems # NOTE: there are duplicates in this list, and some files # are identical but in multiple directories (regslice_core.v) # remove duplicates from list by doing list -> set -> list all_verilog_files = list( set(filter(lambda x: x.endswith(".v"), all_verilog_srcs))) # remove all but one instances of regslice_core.v filtered_verilog_files = [] remove_entry = False for vfile in all_verilog_files: if "regslice_core" in vfile: if not remove_entry: filtered_verilog_files.append(vfile) remove_entry = True else: filtered_verilog_files.append(vfile) # concatenate all verilog code into a single file with open(vivado_stitch_proj_dir + "/" + top_module_file_name, "w") as wf: for vfile in filtered_verilog_files: with open(vfile) as rf: wf.write("//Added from " + vfile + "\n\n") wf.write(rf.read()) sim = PyVerilator.build( top_module_file_name, verilog_path=[vivado_stitch_proj_dir], build_dir=build_dir, trace_depth=get_rtlsim_trace_depth(), top_module_name=top_module_name, auto_eval=False, read_internal_signals=read_internal_signals, ) return sim
def test_npy2apintstream(test_shape, dtype): ndarray = cutil.gen_finn_dt_tensor(dtype, test_shape) test_dir = cutil.make_build_dir(prefix="test_npy2apintstream_") shape = ndarray.shape elem_bits = dtype.bitwidth() packed_bits = shape[-1] * elem_bits packed_hls_type = "ap_uint<%d>" % packed_bits elem_hls_type = dtype.get_hls_datatype_str() npy_in = test_dir + "/in.npy" npy_out = test_dir + "/out.npy" # restrict the np datatypes we can handle npyt_to_ct = { "float32": "float", "float64": "double", "int8": "int8_t", "int32": "int32_t", "int64": "int64_t", "uint8": "uint8_t", "uint32": "uint32_t", "uint64": "uint64_t", } npy_type = npyt_to_ct[str(ndarray.dtype)] shape_cpp_str = str(shape).replace("(", "{").replace(")", "}") test_app_string = [] test_app_string += ["#include <cstddef>"] test_app_string += ["#define AP_INT_MAX_W 4096"] test_app_string += ['#include "ap_int.h"'] test_app_string += ['#include "stdint.h"'] test_app_string += ['#include "hls_stream.h"'] test_app_string += ['#include "cnpy.h"'] test_app_string += ['#include "npy2apintstream.hpp"'] test_app_string += ["int main(int argc, char *argv[]) {"] test_app_string += ["hls::stream<%s> teststream;" % packed_hls_type] test_app_string += [ 'npy2apintstream<%s, %s, %d, %s>("%s", teststream);' % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) ] test_app_string += [ 'apintstream2npy<%s, %s, %d, %s>(teststream, %s, "%s");' % (packed_hls_type, elem_hls_type, elem_bits, npy_type, shape_cpp_str, npy_out) ] test_app_string += ["return 0;"] test_app_string += ["}"] with open(test_dir + "/test.cpp", "w") as f: f.write("\n".join(test_app_string)) cmd_compile = """ g++ -o test_npy2apintstream test.cpp /workspace/cnpy/cnpy.cpp \ -I/workspace/cnpy/ -I{}/include -I/workspace/finn/src/finn/qnn-data/cpp \ --std=c++11 -lz""".format( os.environ["VIVADO_PATH"] ) with open(test_dir + "/compile.sh", "w") as f: f.write(cmd_compile) compile = subprocess.Popen( ["sh", "compile.sh"], stdout=subprocess.PIPE, cwd=test_dir ) (stdout, stderr) = compile.communicate() # make copy before saving the array ndarray = ndarray.copy() np.save(npy_in, ndarray) execute = subprocess.Popen( "./test_npy2apintstream", stdout=subprocess.PIPE, cwd=test_dir ) (stdout, stderr) = execute.communicate() produced = np.load(npy_out) success = (produced == ndarray).all() # only delete generated code if test has passed # useful for debug otherwise if success: shutil.rmtree(test_dir) assert success
def apply(self, model): ip_dirs = ["list"] create_cmds = [] connect_cmds = [] # ensure that all nodes are fpgadataflow, and that IPs are generated for node in model.graph.node: assert node.domain == "finn", 'Node domain is not set to "finn"' backend_attribute = get_by_name(node.attribute, "backend") assert backend_attribute is not None, "Backend node attribute is not set." backend_value = backend_attribute.s.decode("UTF-8") assert (backend_value == "fpgadataflow" ), """Backend node attribute is not set to "fpgadataflow".""" ip_dir_attribute = get_by_name(node.attribute, "ipgen_path") assert (ip_dir_attribute is not None), """Node attribute "ipgen_path" is not set. Please run transformation CodeGen_ipgen first.""" ip_dir_value = ip_dir_attribute.s.decode("UTF-8") ip_dir_value += "/sol1/impl/ip" assert os.path.isdir( ip_dir_value), "IP generation directory doesn't exist." ip_dirs += [ip_dir_value] vlnv = "xilinx.com:hls:%s:1.0" % node.name inst_name = node.name create_cmd = "create_bd_cell -type ip -vlnv %s %s" % (vlnv, inst_name) create_cmds += [create_cmd] # TODO nonlinear topologies: check this for all inputs my_producer = model.find_producer(node.input[0]) if my_producer is None: # first node in graph # make clock and reset external connect_cmds.append( "make_bd_pins_external [get_bd_pins %s/ap_clk]" % inst_name) connect_cmds.append( "make_bd_pins_external [get_bd_pins %s/ap_rst_n]" % inst_name) # make input external connect_cmds.append( "make_bd_intf_pins_external [get_bd_intf_pins %s/in0_V_V]" % inst_name) else: # intermediate node # wire up global clock and reset connect_cmds.append( "connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins %s/ap_rst_n]" % inst_name) connect_cmds.append( "connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins %s/ap_clk]" % inst_name) # wire up input to previous output # TODO nonlinear topologies: loop over all inputs my_in_name = "%s/in0_V_V" % (inst_name) prev_out_name = "%s/out_V_V" % (my_producer.name) connect_cmds.append( "connect_bd_intf_net [get_bd_intf_pins %s] [get_bd_intf_pins %s]" % (prev_out_name, my_in_name)) if model.find_consumer(node.output[0]) is None: # last node in graph # ensure it is a TLastMarker to have a valid TLast signal assert (node.op_type == "TLastMarker" ), """Last node is not TLastMarker. Please run transformation InsertTLastMarker to ensure a valid TLast signal""" # make output external connect_cmds.append( "make_bd_intf_pins_external [get_bd_intf_pins %s/out_r]" % inst_name) # create a temporary folder for the project prjname = "finn_vivado_stitch_proj" vivado_stitch_proj_dir = make_build_dir(prefix="vivado_stitch_proj_") model.set_metadata_prop("vivado_stitch_proj", vivado_stitch_proj_dir) # start building the tcl script tcl = [] # create vivado project tcl.append("create_project %s %s -part %s" % (prjname, vivado_stitch_proj_dir, self.fpgapart)) # add all the generated IP dirs to ip_repo_paths ip_dirs_str = " ".join(ip_dirs) tcl.append("set_property ip_repo_paths [%s] [current_project]" % ip_dirs_str) tcl.append("update_ip_catalog") # create block design and instantiate all layers block_name = "finn_design" tcl.append('create_bd_design "%s"' % block_name) tcl.extend(create_cmds) tcl.extend(connect_cmds) tcl.append("regenerate_bd_layout") tcl.append("validate_bd_design") tcl.append("save_bd_design") # export block design itself as an IP core block_vendor = "xilinx_finn" block_library = "finn" block_vlnv = "%s:%s:%s:1.0" % (block_vendor, block_library, block_name) model.set_metadata_prop("vivado_stitch_vlnv", block_vlnv) tcl.append( ("ipx::package_project -root_dir %s/ip -vendor %s " "-library %s -taxonomy /UserIP -module %s -import_files") % (vivado_stitch_proj_dir, block_vendor, block_library, block_name)) tcl.append("set_property core_revision 2 [ipx::find_open_core %s]" % block_vlnv) tcl.append("ipx::create_xgui_files [ipx::find_open_core %s]" % block_vlnv) tcl.append("ipx::update_checksums [ipx::find_open_core %s]" % block_vlnv) tcl.append("ipx::save_core [ipx::find_open_core %s]" % block_vlnv) # create wrapper hdl (for rtlsim later on) bd_base = "%s/%s.srcs/sources_1/bd/%s" % ( vivado_stitch_proj_dir, prjname, block_name, ) bd_filename = "%s/%s.bd" % (bd_base, block_name) tcl.append("make_wrapper -files [get_files %s] -top" % bd_filename) wrapper_filename = "%s/hdl/%s_wrapper.v" % (bd_base, block_name) tcl.append("add_files -norecurse %s" % wrapper_filename) model.set_metadata_prop("wrapper_filename", wrapper_filename) # export list of used Verilog files (for rtlsim later on) tcl.append( "set all_v_files [get_files -filter {FILE_TYPE == Verilog}]") v_file_list = "%s/all_verilog_srcs.txt" % vivado_stitch_proj_dir tcl.append("set fp [open %s w]" % v_file_list) tcl.append("puts $fp $all_v_files") tcl.append("close $fp") # write the project creator tcl script tcl_string = "\n".join(tcl) + "\n" with open(vivado_stitch_proj_dir + "/make_project.tcl", "w") as f: f.write(tcl_string) # create a shell script and call Vivado make_project_sh = vivado_stitch_proj_dir + "/make_project.sh" working_dir = os.environ["PWD"] with open(make_project_sh, "w") as f: f.write("#!/bin/bash \n") f.write("cd {}\n".format(vivado_stitch_proj_dir)) f.write("vivado -mode batch -source make_project.tcl\n") f.write("cd {}\n".format(working_dir)) bash_command = ["bash", make_project_sh] process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) process_compile.communicate() return (model, False)