def test_end2end_cnv_w1a1_fold_and_tlastmarker(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_model.onnx") fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") # each tuple is (PE, SIMD, in_fifo_depth) for a layer folding = [ (16, 3, 128), (32, 32, 128), (16, 32, 128), (16, 32, 128), (4, 32, 81), (1, 32, 2), (1, 4, 2), (1, 8, 128), (5, 1, 3), ] for fcl, (pe, simd, ififodepth) in zip(fc_layers, folding): fcl_inst = getCustomOp(fcl) fcl_inst.set_nodeattr("PE", pe) fcl_inst.set_nodeattr("SIMD", simd) fcl_inst.set_nodeattr("inFIFODepth", ififodepth) swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator") for i in range(len(swg_layers)): swg_inst = getCustomOp(swg_layers[i]) simd = folding[i][1] swg_inst.set_nodeattr("SIMD", simd) model = model.transform(InsertDWC()) model = model.transform(InsertFIFO()) model = model.transform(InsertTLastMarker()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(AnnotateResources("estimate")) model.save(build_dir + "/end2end_cnv_w1a1_folded.onnx")
def test_end2end_cnv_w1a1_run_on_pynq(): # use the streamlined model as the "golden" model for right answers golden = ModelWrapper(build_dir + "/end2end_cnv_w1a1_streamlined.onnx") iname = golden.graph.input[0].name oname = golden.graph.output[0].name # load one of the test vectors fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) x = input_tensor # run using FINN-based execution ret_golden = execute_onnx(golden, {iname: x}, True) y_golden = ret_golden[oname] # set up parent+child graph to test # we'll use models from the previous step as the child model parent_model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx") iname = parent_model.graph.input[0].name oname = parent_model.graph.output[0].name try: ip = os.environ["PYNQ_IP"] # NOQA if ip == "": pytest.skip("PYNQ board IP address not specified") # produce results with cppsim sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] sdp_node = getCustomOp(sdp_node) sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w1a1_pynq_deploy.onnx") ret = execute_onnx(parent_model, {iname: x}, True) y = ret[oname] assert np.isclose(y, y_golden).all() assert np.argmax(y) == 3 except KeyError: pytest.skip("PYNQ board IP address not specified")
def step_qonnx_to_finn(model: ModelWrapper, cfg: DataflowBuildConfig): """ This step will only execute if QONNX nodes are found. These include the following op_types: "Quant" , "Trunc" and "BinaryQuant". If such nodes are found the step will run the tidy-up step from QONNX and then convert the QONNX model to the FINN-ONNX dialect. """ # Check if any QONNX nodes exist, i.e. BinaryQuant, Quant or Trunc q_count = 0 for op_type in ["BinaryQuant", "Quant", "Trunc"]: q_count += len(model.get_nodes_by_op_type(op_type)) if q_count == 0: return model # QONNX cleanup model = cleanup_model(model) # QONNX to FINN-ONNX model = model.transform( ConvertQONNXtoFINN( filter_function=default_filter_function_generator( max_multithreshold_bit_width=cfg.max_multithreshold_bit_width ) ) ) if VerificationStepType.QONNX_TO_FINN_PYTHON in cfg._resolve_verification_steps(): verify_step(model, cfg, "qonnx_to_finn_python", need_parent=False) return model
def test_end2end_tfc_w1a2_run_on_pynq(): # use the streamlined model as the "golden" model for right answers golden = ModelWrapper(build_dir + "/end2end_tfc_w1a2_streamlined.onnx") iname = golden.graph.input[0].name oname = golden.graph.output[0].name raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) x = nph.to_array(input_tensor) # x = np.zeros(ishape, dtype=np.float32) # run using FINN-based execution ret_golden = execute_onnx(golden, {iname: x}, True) y_golden = ret_golden[oname] # set up parent+child graph to test # we'll use models from the previous step as the child model parent_model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_dataflow_parent.onnx") iname = parent_model.graph.input[0].name oname = parent_model.graph.output[0].name try: ip = os.environ["PYNQ_IP"] # NOQA if ip == "": pytest.skip("PYNQ board IP address not specified") # produce results with cppsim sdp_node = parent_model.get_nodes_by_op_type( "StreamingDataflowPartition")[0] sdp_node = getCustomOp(sdp_node) sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w1a2_pynq_deploy.onnx") ret = execute_onnx(parent_model, {iname: x}, True) y = ret[oname] assert np.isclose(y, y_golden).all() except KeyError: pytest.skip("PYNQ board IP address not specified")
def step_convert_to_hls(model: ModelWrapper, cfg: DataflowBuildConfig): """Convert eligible nodes to `HLSCustomOp` subclasses that represent HLS layers. Which nodes and particular configurations can be converted to HLS is limited, see the source code of the `convert_to_hls` module for more.""" mem_mode = cfg.default_mem_mode.value if cfg.standalone_thresholds: # doing this first causes all threshold layers to be standalone model = model.transform(to_hls.InferThresholdingLayer()) # needed for bipolar MatMul layers model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode)) # needed for non-bipolar MatMul layers model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) # TopK to LabelSelect model = model.transform(to_hls.InferLabelSelectLayer()) # input quantization (if any) as standalone threshold model = model.transform(to_hls.InferThresholdingLayer()) # needed for convolutions -- TODO always exec? need_conv = len(model.get_nodes_by_op_type("Im2Col")) > 0 if need_conv: model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) model = model.transform(RemoveCNVtoFCFlatten()) # get rid of Tranpose -> Tranpose identity seq model = model.transform(absorb.AbsorbConsecutiveTransposes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(InferDataLayouts()) return model
def copy_onnx_model(parent_model_path, new_path, ip_src_path="/tmp/finn_dev_justin"): # Copies all IP into new_path, updates all paths in child_models then saves a copy of each child_model to the new path, updates DataflowPartitions in parent_model, then saves a copy of the new parent_model to new_path # IMPORTANT: All verilog paths must be relative for this to work parent_model = ModelWrapper(parent_model_path) streaming_dataflow_partition_nodes = parent_model.get_nodes_by_op_type( "StreamingDataflowPartition") num_child_models = len(streaming_dataflow_partition_nodes) list_of_new_child_model_paths = [] for i in range(0, num_child_models): child_model_path = getCustomOp( streaming_dataflow_partition_nodes[i]).get_nodeattr("model") child_model = ModelWrapper(child_model_path) # Copy the IP into new_path and update child model paths new_child_model = copy_ip(child_model, new_path, ip_src_path) # Save the new child_model new_child_model_path = new_path + f"/child_{i}.onnx" new_child_model.save(new_child_model_path) list_of_new_child_model_paths.append(new_child_model_path) # Update the parent model by attaching the new child model paths, then save new_parent_model = attach_child_models_to_parent_model( parent_model, list_of_new_child_model_paths) new_parent_model_path = new_path + "/parent.onnx" new_parent_model.save(new_parent_model_path) return new_parent_model_path
def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig): """Run streamlining on given model. Streamlining involves moving floating point scale/shift parameters around, collapsing adjacent ones into a single parameter, then absorbing the scale/shift into the following `MultiThreshold` node. Streamlining requires careful topology design and cannot be applied to all topologies. """ model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold()) model = model.transform(Streamline()) need_lowering = len(model.get_nodes_by_op_type("Conv")) > 0 if need_lowering: model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) # absorb final add-mul nodes into TopK model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) if VerificationStepType.STREAMLINED_PYTHON in cfg._resolve_verification_steps( ): verify_step(model, cfg, "streamlined_python", need_parent=False) return model
def test_const_folding_shapes(): raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model = model.transform(InferShapes()) mm_node_w_in = model.get_nodes_by_op_type("MatMul")[0].input[1] assert model.find_producer(mm_node_w_in) is not None assert model.find_producer(mm_node_w_in).op_type == "Reshape" assert model.get_initializer(mm_node_w_in) is None model = model.transform(FoldConstants()) assert model.find_producer(mm_node_w_in) is None assert model.get_initializer(mm_node_w_in) is not None
def test_modelwrapper(): raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) assert model.check_all_tensor_shapes_specified() is True inp_name = model.graph.input[0].name inp_shape = model.get_tensor_shape(inp_name) assert inp_shape == [1, 1, 28, 28] conv_nodes = model.get_nodes_by_op_type("Conv") matmul_nodes = model.get_nodes_by_op_type("MatMul") assert len(conv_nodes) == 2 assert len(matmul_nodes) == 1 first_conv = conv_nodes[0] first_conv_iname = first_conv.input[0] first_conv_wname = first_conv.input[1] first_conv_oname = first_conv.output[0] assert first_conv_iname != "" and (first_conv_iname is not None) assert first_conv_wname != "" and (first_conv_wname is not None) assert first_conv_oname != "" and (first_conv_oname is not None) first_conv_weights = model.get_initializer(first_conv_wname) assert first_conv_weights.shape == (8, 1, 5, 5) first_conv_weights_rand = np.random.randn(8, 1, 5, 5) model.set_initializer(first_conv_wname, first_conv_weights_rand) assert (model.get_initializer(first_conv_wname) == first_conv_weights_rand ).all() inp_cons = model.find_consumer(first_conv_iname) assert inp_cons == first_conv out_prod = model.find_producer(first_conv_oname) assert out_prod == first_conv inp_layout = model.get_tensor_layout(first_conv_iname) assert inp_layout is None inp_layout = DataLayout.NCHW model.set_tensor_layout(first_conv_iname, inp_layout) assert model.get_tensor_layout(first_conv_iname) == inp_layout inp_sparsity = model.get_tensor_sparsity(first_conv_iname) assert inp_sparsity is None inp_sparsity = {"dw": {"kernel_shape": [3, 3]}} model.set_tensor_sparsity(first_conv_iname, inp_sparsity) assert model.get_tensor_sparsity(first_conv_iname) == inp_sparsity
def test_linear_past_eltwise_add(ch, ifmdim): # generate test vectors of correct shape if ifmdim == -1: input_tensor_shape = (1, ch) else: input_tensor_shape = (1, ch, ifmdim, ifmdim) model = make_model(input_tensor_shape) model.save(export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) x1 = np.random.randn(*input_tensor_shape).astype(np.float32) x2 = np.random.randn(*input_tensor_shape).astype(np.float32) # generate expected value from streamlined net input_dict = {model.graph.input[0].name: x1, model.graph.input[1].name: x2} output_dict = oxe.execute_onnx(model, input_dict, True) produced_sum = output_dict[model.graph.output[0].name] expected_sum = 3.0 * ((x1 + x2) + 15.0) assert np.isclose(expected_sum, produced_sum, atol=1e-3).all() assert len(model.get_nodes_by_op_type("Add")) == 3 assert len(model.get_nodes_by_op_type("Mul")) == 2 model = model.transform(MoveLinearPastEltwiseAdd()) # verify again, to check we didnt break anything output_dict = oxe.execute_onnx(model, input_dict, True) produced_sum = output_dict[model.graph.output[0].name] assert np.isclose(expected_sum, produced_sum, atol=1e-3).all() assert len(model.get_nodes_by_op_type("Add")) == 2 assert len(model.get_nodes_by_op_type("Mul")) == 1 os.remove(export_onnx_path)
def inference_cost(model_filename, *, output_json=None, output_onnx=None, preprocess=True, discount_sparsity=True): """Print the inference cost estimate metric for given ONNX model. Supports the Quant op for weight/activation quantization. :param model_filename: Filename for ONNX model :param output_json: Optional JSON filename to save the inference cost dict :param output_onnx: Optional ONNX filename to save the final model after any preprocessing :param preprocess: If set, run preprocessing steps such as shape inference, datatype inference and constant folding. Strongly recommended. :param discount_sparsity: If set, will discount op cost of MAC ops with a constant zero weight, and the mem cost of constant zero weights. """ print("Inference cost for " + model_filename) model = ModelWrapper(model_filename) if preprocess: qnt_nodes = model.get_nodes_by_op_type("Quant") for qnt_node in qnt_nodes: qnt_node.domain = "finn.custom_op.general" model = model.transform(InferShapes()) model = model.transform(GiveUniqueParameterTensors()) model = model.transform(InferDataTypes()) model = model.transform(FoldConstants()) model = model.transform(RemoveUnusedTensors()) model = model.transform(RemoveStaticGraphInputs()) model = model.transform(InferDataTypes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) if output_onnx is not None: model.save(output_onnx) ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity)) bops = compute_bops(ret) mem_w_bits = compute_mem_bits(ret, "mem_w") mem_o_bits = compute_mem_bits(ret, "mem_o") ret["total_bops"] = bops ret["total_mem_w_bits"] = mem_w_bits ret["total_mem_o_bits"] = mem_o_bits if "unsupported" in ret: ret["unsupported"] = str(ret["unsupported"]) print(json.dumps(ret, sort_keys=True, indent=2)) if output_json is not None: with open(output_json, "w") as f: json.dump(ret, f, sort_keys=True, indent=2)
def test_end2end_cnv_w1a1_verify_all(): # use the streamlined model as the "golden" model for right answers golden = ModelWrapper(build_dir + "/end2end_cnv_w1a1_streamlined.onnx") iname = golden.graph.input[0].name oname = golden.graph.output[0].name # load one of the test vectors fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) x = input_tensor # x = np.zeros(ishape, dtype=np.float32) ret_golden = execute_onnx(golden, {iname: x}, True) y_golden = ret_golden[oname] # set up parent+child graph to test # we'll use models from the previous step as the child model parent_model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx") iname = parent_model.graph.input[0].name oname = parent_model.graph.output[0].name # produce results with cppsim sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] sdp_node = getCustomOp(sdp_node) sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w1a1_ipgen_cppsim.onnx") ret_cppsim = execute_onnx(parent_model, {iname: x}, True) y_cppsim = ret_cppsim[oname] # produce results with node-by-node rtlsim sdp_node.set_nodeattr( "model", build_dir + "/end2end_cnv_w1a1_ipgen_nodebynode_rtlsim.onnx" ) ret_nodebynode_rtlsim = execute_onnx(parent_model, {iname: x}, True) y_nodebynode_rtlsim = ret_nodebynode_rtlsim[oname] # produce results with whole-network (stitched ip) rtlsim sdp_node.set_nodeattr( "model", build_dir + "/end2end_cnv_w1a1_ipstitch_whole_rtlsim.onnx" ) # this is a particularly long-running test, set liveness thr. to unlimited os.environ["LIVENESS_THRESHOLD"] = "-1" ret_whole_rtlsim = execute_onnx(parent_model, {iname: x}, True) y_whole_rtlsim = ret_whole_rtlsim[oname] assert np.isclose(y_golden, y_cppsim).all() assert np.isclose(y_golden, y_nodebynode_rtlsim).all() assert np.isclose(y_golden, y_whole_rtlsim).all() assert np.argmax(y_golden) == 3
def test_end2end_tfc_w1a2_verify_all(): # use the streamlined model as the "golden" model for right answers golden = ModelWrapper(build_dir + "/end2end_tfc_w1a2_streamlined.onnx") iname = golden.graph.input[0].name oname = golden.graph.output[0].name raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) x = nph.to_array(input_tensor) # x = np.zeros(ishape, dtype=np.float32) ret_golden = execute_onnx(golden, {iname: x}, True) y_golden = ret_golden[oname] # set up parent+child graph to test # we'll use models from the previous step as the child model parent_model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_dataflow_parent.onnx") iname = parent_model.graph.input[0].name oname = parent_model.graph.output[0].name # produce results with cppsim sdp_node = parent_model.get_nodes_by_op_type( "StreamingDataflowPartition")[0] sdp_node = getCustomOp(sdp_node) sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w1a2_ipstitch_cppsim.onnx") ret_cppsim = execute_onnx(parent_model, {iname: x}, True) y_cppsim = ret_cppsim[oname] # produce results with node-by-node rtlsim sdp_node.set_nodeattr( "model", build_dir + "/end2end_tfc_w1a2_ipstitch_nodebynode_rtlsim.onnx") ret_nodebynode_rtlsim = execute_onnx(parent_model, {iname: x}, True) y_nodebynode_rtlsim = ret_nodebynode_rtlsim[oname] # produce results with whole-network (stitched ip) rtlsim sdp_node.set_nodeattr( "model", build_dir + "/end2end_tfc_w1a2_ipstitch_whole_rtlsim.onnx") ret_whole_rtlsim = execute_onnx(parent_model, {iname: x}, True) y_whole_rtlsim = ret_whole_rtlsim[oname] assert np.isclose(y_golden, y_cppsim).all() assert np.isclose(y_golden, y_nodebynode_rtlsim).all() assert np.isclose(y_golden, y_whole_rtlsim).all()
def test_end2end_tfc_w1a2_fold_and_tlastmarker(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_dataflow_model.onnx") fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") # (PE, SIMD, in_fifo_depth, out_fifo_depth, ramstyle) for each layer config = [ (16, 49, 16, 64, "block"), (8, 8, 64, 64, "auto"), (8, 8, 64, 64, "auto"), (10, 8, 64, 10, "distributed"), ] for fcl, (pe, simd, ififo, ofifo, ramstyle) in zip(fc_layers, config): fcl_inst = getCustomOp(fcl) fcl_inst.set_nodeattr("PE", pe) fcl_inst.set_nodeattr("SIMD", simd) fcl_inst.set_nodeattr("inFIFODepth", ififo) fcl_inst.set_nodeattr("outFIFODepth", ofifo) fcl_inst.set_nodeattr("ram_style", ramstyle) model = model.transform(InsertDWC()) model = model.transform(InsertFIFO()) model = model.transform(InsertTLastMarker()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(AnnotateResources("estimate")) model.save(build_dir + "/end2end_tfc_w1a2_folded.onnx")
def test_brevitas_compare_exported_mobilenet(): if "IMAGENET_VAL_PATH" not in os.environ.keys(): pytest.skip("Can't do validation without IMAGENET_VAL_PATH") n_images = 10 debug_mode = False export_onnx_path = make_build_dir("test_brevitas_mobilenet-v1_") # export preprocessing preproc_onnx = export_onnx_path + "/quant_mobilenet_v1_4b_preproc.onnx" preproc = NormalizePreProc(mean, std, ch) bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx) preproc_model = ModelWrapper(preproc_onnx) preproc_model = preproc_model.transform(InferShapes()) preproc_model = preproc_model.transform(GiveUniqueNodeNames()) preproc_model = preproc_model.transform(GiveUniqueParameterTensors()) preproc_model = preproc_model.transform(GiveReadableTensorNames()) # export the actual MobileNet-v1 finn_onnx = export_onnx_path + "/quant_mobilenet_v1_4b.onnx" mobilenet = get_test_model_trained("mobilenet", 4, 4) if debug_mode: dbg_hook = bo.enable_debug(mobilenet) bo.export_finn_onnx(mobilenet, (1, 3, 224, 224), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) model = model.transform(InsertTopK()) # get initializer from Mul that will be absorbed into topk a0 = model.get_initializer(model.get_nodes_by_op_type("Mul")[-1].input[1]) model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveUniqueParameterTensors()) model = model.transform(GiveReadableTensorNames()) model.save(export_onnx_path + "/quant_mobilenet_v1_4b_wo_preproc.onnx") # create merged preprocessing + MobileNet-v1 model model = model.transform(MergeONNXModels(preproc_model)) model.save(export_onnx_path + "/quant_mobilenet_v1_4b.onnx") with open( export_onnx_path + "/mobilenet_validation.csv", "w", newline="" ) as csvfile: writer = csv.writer(csvfile) writer.writerow( [ "goldenID", "brevitasTop5", "brevitasTop5[%]", "finnTop5", "finnTop5[%]", "top5equal", "top5%equal", ] ) csvfile.flush() workload = imagenet_util.get_val_images(n_images, interleave_classes=True) all_inds_ok = True all_probs_ok = True for (img_path, target_id) in workload: img_np = imagenet_util.load_resize_crop(img_path) img_torch = torch.from_numpy(img_np).float() # do forward pass in PyTorch/Brevitas input_tensor = preproc.forward(img_torch) expected = mobilenet.forward(input_tensor).detach().numpy() expected_topk = expected.flatten() expected_top5 = np.argsort(expected_topk)[-5:] expected_top5 = np.flip(expected_top5) expected_top5_prob = [] for index in expected_top5: expected_top5_prob.append(expected_topk[index]) idict = {model.graph.input[0].name: img_np} odict = oxe.execute_onnx(model, idict, return_full_exec_context=True) produced = odict[model.graph.output[0].name] produced_prob = odict["TopK_0_out0"] * a0 inds_ok = (produced.flatten() == expected_top5).all() probs_ok = np.isclose(produced_prob.flatten(), expected_top5_prob).all() all_inds_ok = all_inds_ok and inds_ok all_probs_ok = all_probs_ok and probs_ok writer.writerow( [ str(target_id), str(expected_top5), str(expected_top5_prob), str(produced.flatten()), str(produced_prob.flatten()), str(inds_ok), str(probs_ok), ] ) csvfile.flush() if ((not inds_ok) or (not probs_ok)) and debug_mode: print("Results differ for %s" % img_path) # check all tensors at debug markers names_brevitas = set(dbg_hook.values.keys()) names_finn = set(odict.keys()) names_common = names_brevitas.intersection(names_finn) for dbg_name in names_common: if not np.isclose( dbg_hook.values[dbg_name].detach().numpy(), odict[dbg_name], atol=1e-3, ).all(): print("Tensor %s differs between Brevitas and FINN" % dbg_name) assert all_inds_ok and all_probs_ok
def test_end2end_cybsec_mlp_export(QONNX_export): assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/") # load up trained net in Brevitas input_size = 593 hidden1 = 64 hidden2 = 64 hidden3 = 64 weight_bit_width = 2 act_bit_width = 2 num_classes = 1 model = nn.Sequential( QuantLinear(input_size, hidden1, bias=True, weight_bit_width=weight_bit_width), nn.BatchNorm1d(hidden1), nn.Dropout(0.5), QuantReLU(bit_width=act_bit_width), QuantLinear(hidden1, hidden2, bias=True, weight_bit_width=weight_bit_width), nn.BatchNorm1d(hidden2), nn.Dropout(0.5), QuantReLU(bit_width=act_bit_width), QuantLinear(hidden2, hidden3, bias=True, weight_bit_width=weight_bit_width), nn.BatchNorm1d(hidden3), nn.Dropout(0.5), QuantReLU(bit_width=act_bit_width), QuantLinear(hidden3, num_classes, bias=True, weight_bit_width=weight_bit_width), ) trained_state_dict = torch.load(assets_dir + "/state_dict.pth")["models_state_dict"][0] model.load_state_dict(trained_state_dict, strict=False) W_orig = model[0].weight.data.detach().numpy() # pad the second (593-sized) dimensions with 7 zeroes at the end W_new = np.pad(W_orig, [(0, 0), (0, 7)]) model[0].weight.data = torch.from_numpy(W_new) model_for_export = CybSecMLPForExport(model) export_onnx_path = get_checkpoint_name("export", QONNX_export) input_shape = (1, 600) # create a QuantTensor instance to mark the input as bipolar during export input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32) input_a = 2 * input_a - 1 scale = 1.0 input_t = torch.from_numpy(input_a * scale) input_qt = QuantTensor(input_t, scale=torch.tensor(scale), bit_width=torch.tensor(1.0), signed=True) if QONNX_export: # With the BrevitasONNXManager we need to manually set # the FINN DataType at the input BrevitasONNXManager.export(model_for_export, input_shape, export_path=export_onnx_path) model = ModelWrapper(export_onnx_path) model.set_tensor_datatype(model.graph.input[0].name, DataType["BIPOLAR"]) model.save(export_onnx_path) qonnx_cleanup(export_onnx_path, out_file=export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(ConvertQONNXtoFINN()) model.save(export_onnx_path) else: bo.export_finn_onnx(model_for_export, export_path=export_onnx_path, input_t=input_qt) assert os.path.isfile(export_onnx_path) # fix input datatype finn_model = ModelWrapper(export_onnx_path) finnonnx_in_tensor_name = finn_model.graph.input[0].name assert tuple(finn_model.get_tensor_shape(finnonnx_in_tensor_name)) == (1, 600) # verify a few exported ops if QONNX_export: # The first "Mul" node doesn't exist in the QONNX export, # because the QuantTensor scale is not exported. # However, this node would have been unity scale anyways and # the models are still equivalent. assert finn_model.graph.node[0].op_type == "Add" assert finn_model.graph.node[1].op_type == "Div" assert finn_model.graph.node[2].op_type == "MatMul" assert finn_model.graph.node[-1].op_type == "MultiThreshold" else: assert finn_model.graph.node[0].op_type == "Mul" assert finn_model.get_initializer( finn_model.graph.node[0].input[1]) == 1.0 assert finn_model.graph.node[1].op_type == "Add" assert finn_model.graph.node[2].op_type == "Div" assert finn_model.graph.node[3].op_type == "MatMul" assert finn_model.graph.node[-1].op_type == "MultiThreshold" # verify datatypes on some tensors assert (finn_model.get_tensor_datatype(finnonnx_in_tensor_name) == DataType["BIPOLAR"]) first_matmul_w_name = finn_model.get_nodes_by_op_type("MatMul")[0].input[1] assert finn_model.get_tensor_datatype( first_matmul_w_name) == DataType["INT2"]
def test_brevitas_debug(QONNX_export, QONNX_FINN_conversion): if (not QONNX_export) and QONNX_FINN_conversion: pytest.skip( "This test configuration is not valid and is thus skipped.") finn_onnx = "test_brevitas_debug.onnx" fc = get_test_model_trained("TFC", 2, 2) ishape = (1, 1, 28, 28) if QONNX_export: dbg_hook = bo.enable_debug(fc, proxy_level=True) BrevitasONNXManager.export(fc, ishape, finn_onnx) # DebugMarkers have the brevitas.onnx domain, so that needs adjusting model = ModelWrapper(finn_onnx) dbg_nodes = model.get_nodes_by_op_type("DebugMarker") for dbg_node in dbg_nodes: dbg_node.domain = "finn.custom_op.general" model.save(finn_onnx) qonnx_cleanup(finn_onnx, out_file=finn_onnx) if QONNX_FINN_conversion: model = ModelWrapper(finn_onnx) model = model.transform(ConvertQONNXtoFINN()) model.save(finn_onnx) else: dbg_hook = bo.enable_debug(fc) bo.export_finn_onnx(fc, ishape, finn_onnx) model = ModelWrapper(finn_onnx) # DebugMarkers have the brevitas.onnx domain, so that needs adjusting # ToDo: We should probably have transformation pass, which does this # domain conversion for us? dbg_nodes = model.get_nodes_by_op_type("DebugMarker") for dbg_node in dbg_nodes: dbg_node.domain = "finn.custom_op.general" model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) model.save(finn_onnx) model = ModelWrapper(finn_onnx) assert len(model.graph.input) == 1 assert len(model.graph.output) == 1 # load one of the test vectors raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) # run using FINN-based execution input_dict = {model.graph.input[0].name: nph.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) produced = output_dict[model.graph.output[0].name] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(nph.to_array(input_tensor)).float() assert input_tensor.shape == (1, 1, 28, 28) # do forward pass in PyTorch/Brevitas expected = fc.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() # check all tensors at debug markers names_brevitas = set(dbg_hook.values.keys()) names_finn = set(output_dict.keys()) names_common = names_brevitas.intersection(names_finn) # The different exports return debug markers in different numbers and places print(len(names_common)) if QONNX_export and not QONNX_FINN_conversion: assert len(names_common) == 12 elif QONNX_export and QONNX_FINN_conversion: assert len(names_common) == 8 else: assert len(names_common) == 16 for dbg_name in names_common: if QONNX_export: tensor_pytorch = dbg_hook.values[dbg_name].value.detach().numpy() else: tensor_pytorch = dbg_hook.values[dbg_name].detach().numpy() tensor_finn = output_dict[dbg_name] assert np.isclose(tensor_finn, tensor_pytorch, atol=1e-5).all() os.remove(finn_onnx)
def test_convert_to_hls_layers_cnv_w1a1(fused_activation): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv) model = ModelWrapper(export_onnx_path_cnv) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model = model.transform(InferDataLayouts()) # model.save("golden.onnx") # load one of the test vectors fn = pk.resource_filename("finn.qnn-data", "cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # generate expected value from streamlined net input_dict = {"global_in": input_tensor} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] # if we infer thresholding first, all MultiThresholds get converted to HLS # subsequently, the FC inference will generate passthrough MVAUs if not fused_activation: model = model.transform(to_hls.InferThresholdingLayer()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) for node in model.graph.node: if node.op_type == "StreamingFCLayer_Batch": inst = getCustomOp(node) inst.set_nodeattr("mem_mode", "decoupled") mw = inst.get_nodeattr("MW") mh = inst.get_nodeattr("MH") if mh % 4 == 0: pe = mh // 4 else: pe = mh inst.set_nodeattr("PE", pe) if mw % 16 == 0: simd = mw // 16 else: simd = mw inst.set_nodeattr("SIMD", simd) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) # check topology status finn_nodes = model.get_finn_nodes() if fused_activation: assert len(finn_nodes) == 18 else: assert len(finn_nodes) == 26 thr_nodes = model.get_nodes_by_op_type("Thresholding_Batch") assert len(thr_nodes) == 8 non_finn_nodes = model.get_non_finn_nodes() assert len(non_finn_nodes) == 4 exp_non_finn_nodes = ["Transpose", "Reshape", "Mul", "Add"] assert [x.op_type for x in non_finn_nodes] == exp_non_finn_nodes fc_nodes = model.get_nodes_by_op_type("StreamingFCLayer_Batch") assert len(fc_nodes) == 9 swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator") assert len(swg_nodes) == 6 mp_nodes = model.get_nodes_by_op_type("StreamingMaxPool_Batch") assert len(mp_nodes) == 2 # model.save("cnv-pre-compile.onnx") model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) # model.save("cnv-post-compile.onnx") produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path_cnv)
def test_convert_to_hls_layers_synthetic(ch, ifmdim, idt): model = make_model(ch, ifmdim) model.save(export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) # model.save("golden.onnx") # generate test vectors of correct shape if ifmdim == -1: input_tensor_shape = (1, ch) else: input_tensor_shape = (1, ch, ifmdim, ifmdim) x = gen_finn_dt_tensor(idt, input_tensor_shape) # generate expected value from streamlined net input_dict = {model.graph.input[0].name: x} output_dict = oxe.execute_onnx(model, input_dict, True) produced_sum = output_dict[model.graph.output[0].name] chw_mul = model.get_initializer(model.graph.node[-1].input[1]) chw_mul = 1 expected_sum = chw_mul * np.sum(2 * (2 * x + 15.0), axis=(2, 3)) / (ifmdim * ifmdim) assert (produced_sum.flatten() == expected_sum.flatten()).all() model = model.transform(InferDataLayouts()) # convert to hls model.set_tensor_datatype(model.graph.input[0].name, idt) # extra streamlining model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(MoveAddPastMul()) model = model.transform(CollapseRepeatedMul()) model = model.transform(CollapseRepeatedAdd()) # insert top-k node, which should absorb linear ops before it model = model.transform(InferShapes()) model = model.transform(InferDataLayouts()) model = model.transform(InferDataTypes()) model = model.transform(to_hls.InferChannelwiseLinearLayer()) model = model.transform(to_hls.InferAddStreamsLayer()) model = model.transform(to_hls.InferGlobalAccPoolLayer()) model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(InsertTopK()) model = model.transform(AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataTypes()) model = model.transform(to_hls.InferLabelSelectLayer()) model = model.transform(AbsorbConsecutiveTransposes()) model = model.transform(InferDataTypes()) model = model.transform(to_hls.InferLabelSelectLayer()) model = model.transform(to_hls.InferDuplicateStreamsLayer()) model = model.transform(SortGraph()) # model.save("golden_hls.onnx") # check topology status finn_nodes = model.get_finn_nodes() assert len(finn_nodes) == 9 add_nodes = model.get_nodes_by_op_type("AddStreams_Batch") assert len(add_nodes) == 1 pool_nodes = model.get_nodes_by_op_type("GlobalAccPool_Batch") assert len(pool_nodes) == 1 label_nodes = model.get_nodes_by_op_type("LabelSelect_Batch") assert len(label_nodes) == 1 channelwise_nodes = model.get_nodes_by_op_type("ChannelwiseOp_Batch") assert len(channelwise_nodes) == 5 dup_nodes = model.get_nodes_by_op_type("DuplicateStreams_Batch") assert len(dup_nodes) == 1 model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) output_dict = oxe.execute_onnx(model, input_dict, True) produced_topk_hls = output_dict[model.graph.output[0].name] topk_input = output_dict[model.graph.node[-1].input[0]] assert soft_verify_topk(topk_input, produced_topk_hls, 5) os.remove(export_onnx_path)