def test_end2end_cnv_w1a1_verify_dataflow_part(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_ipstitch.onnx") x = np.zeros((1, 32, 32, 3), dtype=np.float32) inp_name = model.graph.input[0].name out_name = model.graph.output[0].name inp_dict = {inp_name: x} # cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) model.save(build_dir + "/end2end_cnv_w1a1_ipgen_cppsim.onnx") ret_cppsim = execute_onnx(model, inp_dict, True) res_cppsim = ret_cppsim[out_name] # node-by-node rtlsim model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) model.save(build_dir + "/end2end_cnv_w1a1_ipgen_nodebynode_rtlsim.onnx") ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True) res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] # whole-network (ip-stitched) rtlsim model.set_metadata_prop("exec_mode", "rtlsim") model.save(build_dir + "/end2end_cnv_w1a1_ipstitch_whole_rtlsim.onnx") # this is a particularly long-running test, set liveness thr. to unlimited os.environ["LIVENESS_THRESHOLD"] = "-1" ret_rtlsim_whole = execute_onnx(model, inp_dict, True) res_rtlsim_whole = ret_rtlsim_whole[out_name] assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all() assert np.isclose(res_cppsim, res_rtlsim_whole).all()
def test_end2end_cnv_w1a1_fold_and_tlastmarker(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_model.onnx") fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") # each tuple is (PE, SIMD, in_fifo_depth) for a layer folding = [ (16, 3, 128), (32, 32, 128), (16, 32, 128), (16, 32, 128), (4, 32, 81), (1, 32, 2), (1, 4, 2), (1, 8, 128), (5, 1, 3), ] for fcl, (pe, simd, ififodepth) in zip(fc_layers, folding): fcl_inst = getCustomOp(fcl) fcl_inst.set_nodeattr("PE", pe) fcl_inst.set_nodeattr("SIMD", simd) fcl_inst.set_nodeattr("inFIFODepth", ififodepth) swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator") for i in range(len(swg_layers)): swg_inst = getCustomOp(swg_layers[i]) simd = folding[i][1] swg_inst.set_nodeattr("SIMD", simd) model = model.transform(InsertDWC()) model = model.transform(InsertFIFO()) model = model.transform(InsertTLastMarker()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(AnnotateResources("estimate")) model.save(build_dir + "/end2end_cnv_w1a1_folded.onnx")
def test_renaming(): # load the onnx model raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model = model.transform(InferShapes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # do some basic checks assert model.graph.input[0].name == "global_in" assert model.graph.output[0].name == "global_out" assert model.graph.node[1].op_type == "Conv" assert model.graph.node[1].name == "Conv_0" assert model.graph.node[1].input[1] == "Conv_0_param0" assert model.graph.node[6].op_type == "Add" assert model.graph.node[6].name == "Add_1" assert model.graph.node[6].input[1] == "Add_1_param0" # ensure running renaming twice still yields the same names model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) assert model.graph.node[1].op_type == "Conv" assert model.graph.node[1].name == "Conv_0" assert model.graph.node[1].input[1] == "Conv_0_param0" assert model.graph.node[6].op_type == "Add" assert model.graph.node[6].name == "Add_1" assert model.graph.node[6].input[1] == "Add_1_param0" # run renamed model to make sure we did not mess up the topology raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb") raw_o = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/output_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) output_tensor = onnx.load_tensor_from_string(raw_o) input_dict = {"global_in": np_helper.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict) assert np.isclose( np_helper.to_array(output_tensor), output_dict["global_out"], atol=1e-3 ).all()
def test_xnorpopcountmatmul(): M = 1 K = 3 N = 3 x = helper.make_tensor_value_info("x", TensorProto.FLOAT, [M, K]) W = helper.make_tensor_value_info("W", TensorProto.FLOAT, [K, N]) out = helper.make_tensor_value_info("out", TensorProto.FLOAT, ["x", "y"]) node_def = helper.make_node("XnorPopcountMatMul", ["x", "W"], ["out"], domain="finn.custom_op.general") modelproto = helper.make_model( helper.make_graph([node_def], "test_model", [x], [out], value_info=[W])) model = ModelWrapper(modelproto) model.set_tensor_datatype("x", DataType.BINARY) model.set_tensor_datatype("W", DataType.BINARY) W_data = np.asarray([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32) model.set_initializer("W", W_data) # test shape inference model = model.transform(InferShapes()) assert model.get_tensor_shape("out") == [M, N] # test datatype inference assert model.get_tensor_datatype("out") is DataType.FLOAT32 model = model.transform(InferDataTypes()) assert model.get_tensor_datatype("out") is DataType.UINT32 # test execution x_data = np.asarray([[1, 0, 0]], dtype=np.float32) inp_dict = {"x": x_data} out_dict = oxe.execute_onnx(model, inp_dict) Wb = 2 * W_data - 1 xb = 2 * x_data - 1 rb = np.matmul(xb, Wb) assert (2 * out_dict["out"] - K == rb).all()
def step_resnet50_streamline_linear(model: ModelWrapper, cfg: DataflowBuildConfig): streamline_transformations = [ AbsorbScalarMulAddIntoTopK( ), # before MoveAddPastMul to avoid int->float ConvertSubToAdd(), ConvertDivToMul(), RemoveIdentityOps(), CollapseRepeatedMul(), BatchNormToAffine(), ConvertSignToThres(), MoveAddPastMul(), MoveScalarAddPastMatMul(), MoveAddPastConv(), MoveScalarMulPastMatMul(), MoveScalarMulPastConv(), MoveScalarLinearPastInvariants(), MoveAddPastMul(), CollapseRepeatedAdd(), CollapseRepeatedMul(), AbsorbAddIntoMultiThreshold(), FactorOutMulSignMagnitude(), MoveMaxPoolPastMultiThreshold(), AbsorbMulIntoMultiThreshold(), Absorb1BitMulIntoMatMul(), Absorb1BitMulIntoConv(), RoundAndClipThresholds(), ] for trn in streamline_transformations: model = model.transform(trn) model = model.transform(GiveUniqueNodeNames()) return model
def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig): """Run streamlining on given model. Streamlining involves moving floating point scale/shift parameters around, collapsing adjacent ones into a single parameter, then absorbing the scale/shift into the following `MultiThreshold` node. Streamlining requires careful topology design and cannot be applied to all topologies. """ model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(Streamline()) need_lowering = len(model.get_nodes_by_op_type("Conv")) > 0 if need_lowering: model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) # absorb final add-mul nodes into TopK model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) if VerificationStepType.STREAMLINED_PYTHON in cfg._resolve_verification_steps( ): verify_step(model, cfg, "streamlined_python", need_parent=False) return model
def test_brevitas_debug(): finn_onnx = "test_brevitas_debug.onnx" fc = get_test_model_trained("TFC", 2, 2) dbg_hook = bo.enable_debug(fc) bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) assert len(model.graph.input) == 1 assert len(model.graph.output) == 1 # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) # run using FINN-based execution input_dict = {"0": nph.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) produced = output_dict[model.graph.output[0].name] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(nph.to_array(input_tensor)).float() assert input_tensor.shape == (1, 1, 28, 28) # do forward pass in PyTorch/Brevitas expected = fc.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() # check all tensors at debug markers names_brevitas = set(dbg_hook.values.keys()) names_finn = set(output_dict.keys()) names_common = names_brevitas.intersection(names_finn) assert len(names_common) == 16 for dbg_name in names_common: tensor_pytorch = dbg_hook.values[dbg_name].detach().numpy() tensor_finn = output_dict[dbg_name] assert np.isclose(tensor_finn, tensor_pytorch, atol=1e-5).all() os.remove(finn_onnx)
def test_streamline_fc(size, wbits, abits): if size == "LFC" and wbits == 2 and abits == 2: pytest.skip("No LFC-w2a2 present at the moment") if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) # run using FINN-based execution input_dict = {"global_in": nph.to_array(input_tensor)} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] model = model.transform(Streamline()) produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all()
def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits): if size == "LFC" and wbits == 2 and abits == 2: pytest.skip("No LFC-w2a2 present at the moment") if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) # run using FINN-based execution input_dict = {"0": nph.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict[list(output_dict.keys())[0]] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(nph.to_array(input_tensor)).float() assert input_tensor.shape == (1, 1, 28, 28) # do forward pass in PyTorch/Brevitas expected = fc.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all()
def test_end2end_tfc_w1a2_verify_dataflow_part(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_ipstitch.onnx") x = np.zeros((1, 784), dtype=np.float32) inp_name = model.graph.input[0].name out_name = model.graph.output[0].name inp_dict = {inp_name: x} # npysim model = model.transform(CodeGen_npysim()) model = model.transform(Compile()) model = model.transform(SetExecMode("npysim")) model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_npysim.onnx") ret_npysim = execute_onnx(model, inp_dict, True) res_npysim = ret_npysim[out_name] # node-by-node rtlsim model = model.transform(SetExecMode("rtlsim")) getCustomOp(model.graph.node[0]).set_nodeattr("rtlsim_trace", "default") getCustomOp(model.graph.node[1]).set_nodeattr("rtlsim_trace", "default") getCustomOp(model.graph.node[2]).set_nodeattr("rtlsim_trace", "default") getCustomOp(model.graph.node[3]).set_nodeattr("rtlsim_trace", "default") model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_nodebynode_rtlsim.onnx") ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True) res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] # whole-network (ip-stitched) rtlsim model.set_metadata_prop("exec_mode", "rtlsim") model.set_metadata_prop("rtlsim_trace", "whole_trace.vcd") model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_whole_rtlsim.onnx") ret_rtlsim_whole = execute_onnx(model, inp_dict, True) res_rtlsim_whole = ret_rtlsim_whole[out_name] assert np.isclose(res_npysim, res_rtlsim_nodebynode).all() assert np.isclose(res_npysim, res_rtlsim_whole).all()
def test_move_chw_add_past_conv(idim, k, s, ich, och): odim = compute_conv_output_dim(idim, k, s) ishape = [1, ich, idim, idim] oshape = [1, och, odim, odim] add_param_shape = [1, ich, 1, 1] conv_param_shape = [och, ich, k, k] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) a0 = helper.make_tensor_value_info("a0", TensorProto.FLOAT, add_param_shape) a1 = helper.make_tensor_value_info("a1", TensorProto.FLOAT, conv_param_shape) conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = 1 conv_config["kernel_shape"] = [k, k] conv_config["pads"] = [0, 0, 0, 0] conv_config["strides"] = [s, s] add_node = helper.make_node("Add", ["inp", "a0"], ["add_out"]) conv_node = helper.make_node("Conv", ["add_out", "a1"], ["outp"], **conv_config) model = helper.make_model( helper.make_graph( nodes=[add_node, conv_node], name="move-add-graph", inputs=[inp], outputs=[outp], value_info=[a0, a1], )) model = ModelWrapper(model) # initialize model a0_values = np.random.uniform( low=0, high=1, size=tuple(add_param_shape)).astype(np.float32) model.set_initializer("a0", a0_values) a1_values = np.random.uniform( low=0, high=1, size=tuple(conv_param_shape)).astype(np.float32) model.set_initializer("a1", a1_values) model = model.transform(InferShapes()) # execution before transformation inp_values = np.random.uniform(low=0, high=1, size=tuple(ishape)).astype(np.float32) idict = {model.graph.input[0].name: inp_values} odict = oxe.execute_onnx(model, idict) y_before = odict[model.graph.output[0].name] model = model.transform(MoveAddPastConv()) odict = oxe.execute_onnx(model, idict) y_after = odict[model.graph.output[0].name] assert np.isclose(y_before, y_after).all() assert model.graph.node[0].op_type == "Conv" assert model.graph.node[1].op_type == "Add"
def test_add_pre_and_postproc(self, topology, wbits, abits): prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "import_and_tidy") model = load_test_checkpoint_or_skip(prev_chkpt_name) global_inp_name = model.graph.input[0].name ishape = model.get_tensor_shape(global_inp_name) # preprocessing: torchvision's ToTensor divides uint8 inputs by 255 totensor_pyt = ToTensor() chkpt_preproc_name = get_checkpoint_name(topology, wbits, abits, "preproc") bo.export_finn_onnx(totensor_pyt, ishape, chkpt_preproc_name) assert os.path.isfile(chkpt_preproc_name) # join preprocessing and core model pre_model = ModelWrapper(chkpt_preproc_name) pre_model = pre_model.transform(InferShapes()) pre_model = pre_model.transform(FoldConstants()) model = model.transform(MergeONNXModels(pre_model)) # add input quantization annotation: UINT8 for all BNN-PYNQ models global_inp_name = model.graph.input[0].name model.set_tensor_datatype(global_inp_name, DataType.UINT8) # postprocessing: insert Top-1 node at the end model = model.transform(InsertTopK(k=1)) chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post") # tidy-up again model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveStaticGraphInputs()) model.save(chkpt_name) assert os.path.isfile(chkpt_name)
def test_collapse_repeated_op(): top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, [2]) add_param_0 = oh.make_tensor_value_info("add_param_0", TensorProto.FLOAT, [2]) mul_param_0 = oh.make_tensor_value_info("mul_param_0", TensorProto.FLOAT, [2]) add_param_1 = oh.make_tensor_value_info("add_param_1", TensorProto.FLOAT, [2]) mul_param_1 = oh.make_tensor_value_info("mul_param_1", TensorProto.FLOAT, [2]) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, [2]) modelproto = oh.make_model( oh.make_graph( name="test", inputs=[top_in], outputs=[top_out], value_info=[add_param_0, mul_param_0, add_param_1, mul_param_1], nodes=[ oh.make_node("Add", ["top_in", "add_param_0"], ["middle_0"]), oh.make_node("Add", ["middle_0", "add_param_1"], ["middle_1"]), oh.make_node("Mul", ["middle_1", "mul_param_0"], ["middle_2"]), oh.make_node("Mul", ["middle_2", "mul_param_1"], ["top_out"]), ], ) ) model = ModelWrapper(modelproto) model = model.transform(InferShapes()) model.set_initializer("add_param_0", np.asarray([1, 3], dtype=np.float32)) model.set_initializer("add_param_1", np.asarray([-1, 3], dtype=np.float32)) model.set_initializer("mul_param_0", np.asarray([2, 4], dtype=np.float32)) model.set_initializer("mul_param_1", np.asarray([2, -4], dtype=np.float32)) new_model = model.transform(CollapseRepeatedAdd()) new_model = new_model.transform(CollapseRepeatedMul()) inp_dict = {"top_in": np.asarray([-1.0, 1.0], dtype=np.float32)} assert ox.compare_execution(model, new_model, inp_dict)
def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits, pretrained): if size == "LFC" and wbits == 2 and abits == 2: pytest.skip(f"No LFC_{MAX_WBITS}W{MAX_ABITS}A present.") if wbits > abits: pytest.skip("No wbits > abits cases.") nname = f"{size}_{wbits}W{abits}A" finn_onnx = nname + ".onnx" fc, _ = model_with_cfg(nname.lower(), pretrained=pretrained) FINNManager.export_onnx(fc, FC_INPUT_SIZE, finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(GiveUniqueNodeNames()) model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) # load a random test vector input_tensor = np.random.uniform(MIN_INP_VAL, MAX_INP_VAL, size=FC_INPUT_SIZE).astype(np.float32) # run using FINN-based execution input_dict = {"0": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict[list(output_dict.keys())[0]] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(input_tensor).float() # do forward pass in PyTorch/Brevitas expected = fc.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=ATOL).all()
def test_brevitas_cnv_export_exec(wbits, abits): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") cnv = get_test_model_trained("CNV", wbits, abits) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(GiveUniqueNodeNames()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) assert len(model.graph.input) == 1 assert len(model.graph.output) == 1 fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # run using FINN-based execution input_dict = {model.graph.input[0].name: input_tensor} output_dict = oxe.execute_onnx(model, input_dict, True) produced = output_dict[model.graph.output[0].name] # do forward pass in PyTorch/Brevitas input_tensor = torch.from_numpy(input_tensor).float() expected = cnv.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path)
def test_onnx_exec_internal_rounding(): inp0 = onnx.helper.make_tensor_value_info("inp0", onnx.TensorProto.FLOAT, [2, 2]) inp1 = onnx.helper.make_tensor_value_info("inp1", onnx.TensorProto.FLOAT, [1]) outp = onnx.helper.make_tensor_value_info("outp", onnx.TensorProto.FLOAT, [2, 2]) mul_node = onnx.helper.make_node("Mul", inputs=["inp0", "inp1"], outputs=["outp"]) graph = onnx.helper.make_graph(nodes=[mul_node], name="mul_graph", inputs=[inp0, inp1], outputs=[outp]) model = onnx.helper.make_model(graph, producer_name="mul-model") model = ModelWrapper(model) idt = DataType.INT2 model.set_tensor_datatype("inp0", idt) model.set_tensor_datatype("inp1", idt) model.transform(InferShapes()) mul_value = np.asarray([-1], dtype=np.float32) inp_int = gen_finn_dt_tensor(idt, [2, 2]) scale = np.random.uniform(low=0, high=1, size=(2, 2)).astype(np.float32) inp_rounded = (inp_int * scale) / (scale + 1e-7) input_dict = {"inp0": inp_rounded, "inp1": mul_value} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict["outp"] expected = np.multiply(inp_int, mul_value) assert (produced == expected).all()
def make_dupstreams_modelwrapper(ch, pe, idim, idt): shape = [1, idim, idim, ch] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, shape) outp0 = helper.make_tensor_value_info("outp0", TensorProto.FLOAT, shape) outp1 = helper.make_tensor_value_info("outp1", TensorProto.FLOAT, shape) dupstrm_node = helper.make_node( "DuplicateStreams_Batch", ["inp"], ["outp0", "outp1"], domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", NumChannels=ch, PE=pe, inputDataType=idt.name, numInputVectors=[1, idim, idim], ) graph = helper.make_graph(nodes=[dupstrm_node], name="graph", inputs=[inp], outputs=[outp0, outp1]) model = helper.make_model(graph, producer_name="addstreams-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return model
def test_move_scalar_add_past_matmul(): top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, [1, 2]) add_param = oh.make_tensor_value_info("add_param", TensorProto.FLOAT, [1, 1]) matmul_param = oh.make_tensor_value_info("matmul_param", TensorProto.FLOAT, [2, 2]) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, [1, 2]) modelproto = oh.make_model( oh.make_graph( name="test", inputs=[top_in], outputs=[top_out], value_info=[add_param, matmul_param], nodes=[ oh.make_node("Add", ["top_in", "add_param"], ["middle"]), oh.make_node("MatMul", ["middle", "matmul_param"], ["top_out"]), ], ) ) model = ModelWrapper(modelproto) model = model.transform(InferShapes()) model.set_initializer("add_param", np.asarray([[3]], dtype=np.float32)) model.set_initializer( "matmul_param", np.asarray([[2, 4], [-1, 1]], dtype=np.float32) ) new_model = model.transform(MoveScalarAddPastMatMul()) inp_dict = {"top_in": np.asarray([[-1.0, 1.0]], dtype=np.float32)} assert ox.compare_execution(model, new_model, inp_dict) assert new_model.graph.node[0].op_type == "MatMul" assert new_model.graph.node[1].op_type == "Add" assert new_model.graph.node[0].output[0] == new_model.graph.node[1].input[0]
def make_lookup_model(embeddings, ishape, idt, edt): num_embeddings, embedding_dim = embeddings.shape class LookupModel(nn.Module): def __init__(self, num_embeddings, embedding_dim): super().__init__() self.lookup = nn.Embedding( num_embeddings=num_embeddings, embedding_dim=embedding_dim ) def forward(self, x): x = self.lookup(x) return x torch_model = LookupModel(num_embeddings, embedding_dim) input_t = torch.zeros(ishape, dtype=torch.int64) ret = FINNManager.export(torch_model, input_t=input_t, opset_version=11) model = ModelWrapper(ret) iname = model.graph.input[0].name ename = model.graph.node[0].input[0] model.set_tensor_datatype(iname, idt) eshape = model.get_tensor_shape(ename) assert tuple(eshape) == embeddings.shape model.set_initializer(ename, embeddings) model.set_tensor_datatype(ename, edt) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return model
def test_quartznet_asr_4b(pretrained): finn_onnx = "quant_quartznet_perchannelscaling_4b.onnx" quartznet = quant_quartznet_perchannelscaling_4b(pretrained, export_mode=True) quartznet.eval() FINNManager.export(quartznet, QUARTZNET_POSTPROCESSED_INPUT_SIZE, finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(GiveUniqueNodeNames()) model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) #load a random test vector input_tensor = np.random.uniform( MIN_INP_VAL, MAX_INP_VAL, size=QUARTZNET_POSTPROCESSED_INPUT_SIZE).astype(np.float32) # run using FINN-based execution input_dict = {"0": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict[list(output_dict.keys())[0]] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(input_tensor).float() # do forward pass in PyTorch/Brevitas expected = quartznet(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=ATOL).all()
def test_end2end_tfc_w1a2_verify_dataflow_part(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_ipstitch.onnx") x = np.zeros((1, 784), dtype=np.float32) inp_name = model.graph.input[0].name out_name = model.graph.output[0].name inp_dict = {inp_name: x} # cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_cppsim.onnx") ret_cppsim = execute_onnx(model, inp_dict, True) res_cppsim = ret_cppsim[out_name] # node-by-node rtlsim model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_nodebynode_rtlsim.onnx") ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True) res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] # whole-network (ip-stitched) rtlsim model.set_metadata_prop("exec_mode", "rtlsim") model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_whole_rtlsim.onnx") ret_rtlsim_whole = execute_onnx(model, inp_dict, True) res_rtlsim_whole = ret_rtlsim_whole[out_name] assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all() assert np.isclose(res_cppsim, res_rtlsim_whole).all()
def test_brevitas_cnv_onnx_export_and_exec(wbits, abits, pretrained): if wbits > abits: pytest.skip("No wbits > abits cases.") nname = f"CNV_{wbits}W{abits}A" finn_onnx = nname + ".onnx" cnv, _ = model_with_cfg(nname.lower(), pretrained=pretrained) cnv.eval() # load a random int test vector input_a = np.random.randint(MIN_INP_VAL, MAX_INP_VAL, size=CNV_INPUT_SIZE).astype(np.float32) scale = 1. / 255 input_t = torch.from_numpy(input_a * scale) input_qt = QuantTensor( input_t, scale=torch.tensor(scale), bit_width=torch.tensor(8.0), signed=False) FINNManager.export(cnv, export_path=finn_onnx, input_t=input_qt) model = ModelWrapper(finn_onnx) model = model.transform(GiveUniqueNodeNames()) model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) # run using FINN-based execution input_dict = {"0": input_a} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict[list(output_dict.keys())[0]] # do forward pass in PyTorch/Brevitas expected = cnv(input_t).detach().numpy() assert np.isclose(produced, expected, atol=ATOL).all()
def test_brevitas_QConv2d(dw, bias, in_channels, QONNX_export): ishape = (1, 32, 111, 111) if dw is True: groups = in_channels out_channels = in_channels kernel_size = 3 padding = 1 stride = 1 w_shape = (32, 1, 3, 3) else: groups = 1 out_channels = 64 kernel_size = 1 padding = 0 stride = 1 w_shape = (64, 32, 1, 1) b_conv = QuantConv2d( in_channels=in_channels, out_channels=out_channels, groups=groups, kernel_size=kernel_size, padding=padding, stride=stride, bias=bias, bias_quant_type=QuantType.FP, weight_bit_width=4, weight_quant_type=QuantType.INT, weight_scaling_impl_type=ScalingImplType.STATS, weight_scaling_stats_op=StatsOp.MAX, weight_scaling_per_output_channel=True, weight_restrict_scaling_type=RestrictValueType.LOG_FP, weight_narrow_range=True, weight_scaling_min_val=2e-16, ) weight_tensor = gen_finn_dt_tensor(DataType["INT4"], w_shape) b_conv.weight = torch.nn.Parameter(torch.from_numpy(weight_tensor).float()) b_conv.eval() if QONNX_export: m_path = export_onnx_path BrevitasONNXManager.export(b_conv, ishape, m_path) qonnx_cleanup(m_path, out_file=m_path) model = ModelWrapper(m_path) model = model.transform(ConvertQONNXtoFINN()) model.save(m_path) else: bo.export_finn_onnx(b_conv, ishape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = np.random.uniform(low=-1.0, high=1.0, size=ishape).astype(np.float32) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() expected = b_conv.forward(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def test_end2end_cnv_w1a1_convert_to_hls_layers(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_streamlined.onnx") model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode)) model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) model = model.transform(MoveReshape()) model.save(build_dir + "/end2end_cnv_w1a1_hls_layers.onnx")
def test_end2end_tfc_w1a2_import_and_tidy(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_export.onnx") model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model.save(build_dir + "/end2end_tfc_w1a2_tidy.onnx")
def test_end2end_tfc_w1a1_convert_to_hls_layers(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_streamlined.onnx") model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(absorb.AbsorbAddIntoMultiThreshold()) model = model.transform(absorb.AbsorbMulIntoMultiThreshold()) model = model.transform(RoundAndClipThresholds()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model.save(build_dir + "/end2end_tfc_w1a1_hls_layers.onnx")
def test_end2end_cnv_w1a1_import_and_tidy(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_export.onnx") model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model.save(build_dir + "/end2end_cnv_w1a1_tidy.onnx")
def step_resnet50_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): """ Depending on the auto_fifo_depths setting, do one of the following: * if auto_fifo_depths=True: Run the `InsertAndSetFIFODepths` transformation to attempt to determine the FIFO sizes that provide full throughput. Involves running stitched-IP rtlsim and may take a long time. * if auto_fifo_depths=False: Assume the folding config file contains FIFO sizes as well. Runs the `InsertFIFO` transformation, then `ApplyConfig(cfg.folding_config_file)`, and finally `RemoveShallowFIFOs`. Coherency with config file node naming is ensured by calling `GiveUniqueNodeNames`. """ if cfg.auto_fifo_depths: model = model.transform( InsertAndSetFIFODepths( cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period(), vivado_ram_style=cfg.large_fifo_mem_style.value, )) else: # assume folding cfg json contains FIFO sizes too # insert DWCs, FIFOs and run ApplyConfig once more model = model.transform(InsertDWC()) # need to make sure all FIFOs are created so that their depth can be # set by ApplyConfig, so create_shallow_fifos=True model = model.transform(InsertFIFO(create_shallow_fifos=True)) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) if cfg.folding_config_file is not None: model = model.transform(ApplyConfig(cfg.folding_config_file)) # remove any shallow FIFOs model = model.transform(RemoveShallowFIFOs()) # extract the final configuration and save it as json hw_attrs = [ "PE", "SIMD", "ram_style", "depth", "impl_style", "resType", "mem_mode", "runtime_writeable_weights", ] extract_model_config_to_json(model, cfg.output_dir + "/final_hw_config.json", hw_attrs) # after FIFOs are ready to go, call PrepareIP and HLSSynthIP again # this will only run for the new nodes (e.g. FIFOs and DWCs) model = model.transform( PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())) model = model.transform(HLSSynthIP()) model = model.transform(ReplaceVerilogRelPaths()) return model
def test_conv_lowering_conv_1x1(): np.random.seed(0) in_feature_dim_h = 7 in_feature_dim_w = 7 in_chn = 3 kernel_size = 1 out_feature_dim_h = in_feature_dim_h out_feature_dim_w = in_feature_dim_w input_shape = [1, in_chn, in_feature_dim_h, in_feature_dim_w] output_shape = [1, in_chn, out_feature_dim_h, out_feature_dim_w] conv_param_shape = [in_chn, in_chn, kernel_size, kernel_size] conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = 1 conv_config["kernel_shape"] = [kernel_size, kernel_size] conv_config["pads"] = [0, 0, 0, 0] conv_config["strides"] = [1, 1] top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape) value_info = [ oh.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape) ] modelproto = oh.make_model( oh.make_graph( name="test", inputs=[top_in], outputs=[top_out], value_info=value_info, nodes=[ oh.make_node("Conv", ["top_in", "p1"], ["top_out"], **conv_config) ], )) model = ModelWrapper(modelproto) model = model.transform(InferShapes()) model.set_initializer("p1", np.random.rand(*conv_param_shape).astype(np.float32)) new_model = model.transform(LowerConvsToMatMul()) inp_dict = {"top_in": np.random.rand(*input_shape).astype(np.float32)} assert oxe.compare_execution(model, new_model, inp_dict) assert new_model.graph.node[0].op_type == "Transpose" assert new_model.graph.node[1].op_type == "MatMul" assert new_model.graph.node[2].op_type == "Transpose" assert len(new_model.graph.node) == 3
def test_const_folding_shapes(): lfc = get_test_model_untrained("LFC", 1, 1) bo.export_finn_onnx(lfc, (1, 1, 28, 28), export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) assert model.graph.node[0].op_type == "Reshape" assert list(model.get_tensor_shape("0")) == [1, 1, 28, 28] assert list(model.get_tensor_shape("27")) == [1, 784] os.remove(export_onnx_path)