def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits, pretrained): if size == "LFC" and wbits == 2 and abits == 2: pytest.skip(f"No LFC_{MAX_WBITS}W{MAX_ABITS}A present.") if wbits > abits: pytest.skip("No wbits > abits cases.") nname = f"{size}_{wbits}W{abits}A" finn_onnx = nname + ".onnx" fc, _ = model_with_cfg(nname.lower(), pretrained=pretrained) FINNManager.export_onnx(fc, FC_INPUT_SIZE, finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(GiveUniqueNodeNames()) model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) # load a random test vector input_tensor = np.random.uniform(MIN_INP_VAL, MAX_INP_VAL, size=FC_INPUT_SIZE).astype(np.float32) # run using FINN-based execution input_dict = {"0": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict[list(output_dict.keys())[0]] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(input_tensor).float() # do forward pass in PyTorch/Brevitas expected = fc.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=ATOL).all()
def test_end2end_mobilenet_streamline(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_tidy.onnx") model = model.transform(Streamline()) additional_streamline_transformations = [ DoubleToSingleFloat(), reorder.MoveMulPastDWConv(), absorb.AbsorbMulIntoMultiThreshold(), ChangeDataLayoutQuantAvgPool2d(), InferDataLayouts(), reorder.MoveTransposePastScalarMul(), absorb.AbsorbTransposeIntoFlatten(), reorder.MoveFlattenPastAffine(), reorder.MoveFlattenPastTopK(), reorder.MoveScalarMulPastMatMul(), CollapseRepeatedMul(), RemoveIdentityOps(), RoundAndClipThresholds(), ] for trn in additional_streamline_transformations: model = model.transform(trn) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model.save(build_dir + "/end2end_mobilenet_streamlined.onnx") assert (len(model.get_nodes_by_op_type("Add")) == 1 ) # only final quantized bias Add op remains assert len(model.get_nodes_by_op_type("Mul")) == 0 # no Mul ops remain
def test_quartznet_asr_4b(pretrained): finn_onnx = "quant_quartznet_perchannelscaling_4b.onnx" quartznet = quant_quartznet_perchannelscaling_4b(pretrained, export_mode=True) quartznet.eval() FINNManager.export(quartznet, QUARTZNET_POSTPROCESSED_INPUT_SIZE, finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(GiveUniqueNodeNames()) model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) #load a random test vector input_tensor = np.random.uniform( MIN_INP_VAL, MAX_INP_VAL, size=QUARTZNET_POSTPROCESSED_INPUT_SIZE).astype(np.float32) # run using FINN-based execution input_dict = {"0": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict[list(output_dict.keys())[0]] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(input_tensor).float() # do forward pass in PyTorch/Brevitas expected = quartznet(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=ATOL).all()
def transform( self, transformation, make_deepcopy=True, cleanup=True, fix_float64=True ): """Applies given Transformation repeatedly until no more changes can be made and returns a transformed ModelWrapper instance. - make_deepcopy : operates on a new (deep)copy of model. - fix_float64 : DoubleToSingleFloat correction before starting - cleanup : execute cleanup transformations before returning """ transformed_model = self if make_deepcopy: transformed_model = copy.deepcopy(self) if fix_float64: (transformed_model, model_was_changed) = DoubleToSingleFloat().apply( transformed_model ) model_was_changed = True while model_was_changed: (transformed_model, model_was_changed) = transformation.apply( transformed_model ) if cleanup: transformed_model.cleanup() return transformed_model
def test_streamline_cnv(size, wbits, abits): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) bo.export_finn_onnx(fc, (1, 3, 32, 32), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # load one of the test vectors fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # run using FINN-based execution input_dict = {"global_in": input_tensor} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] # model.save("orig_cnv.onnx") model = model.transform(Streamline()) # model.save("streamlined_cnv.onnx") produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() assert model.graph.node[0].op_type == "MultiThreshold" assert np.argmax(produced) == 3
def test_brevitas_cnv_export_exec(wbits, abits): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") cnv = get_test_model_trained("CNV", wbits, abits) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(GiveUniqueNodeNames()) model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # run using FINN-based execution input_dict = {model.graph.input[0].name: input_tensor} output_dict = oxe.execute_onnx(model, input_dict, True) produced = output_dict[model.graph.output[0].name] # do forward pass in PyTorch/Brevitas input_tensor = torch.from_numpy(input_tensor).float() expected = cnv.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path)
def test_brevitas_cnv_onnx_export_and_exec(wbits, abits, pretrained): if wbits > abits: pytest.skip("No wbits > abits cases.") nname = f"CNV_{wbits}W{abits}A" finn_onnx = nname + ".onnx" cnv, _ = model_with_cfg(nname.lower(), pretrained=pretrained) cnv.eval() # load a random int test vector input_a = np.random.randint(MIN_INP_VAL, MAX_INP_VAL, size=CNV_INPUT_SIZE).astype(np.float32) scale = 1. / 255 input_t = torch.from_numpy(input_a * scale) input_qt = QuantTensor( input_t, scale=torch.tensor(scale), bit_width=torch.tensor(8.0), signed=False) FINNManager.export(cnv, export_path=finn_onnx, input_t=input_qt) model = ModelWrapper(finn_onnx) model = model.transform(GiveUniqueNodeNames()) model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) # run using FINN-based execution input_dict = {"0": input_a} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict[list(output_dict.keys())[0]] # do forward pass in PyTorch/Brevitas expected = cnv(input_t).detach().numpy() assert np.isclose(produced, expected, atol=ATOL).all()
def test_end2end_cnv_w1a1_import_and_tidy(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_export.onnx") model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model.save(build_dir + "/end2end_cnv_w1a1_tidy.onnx")
def step_resnet50_streamline(model: ModelWrapper, cfg: DataflowBuildConfig): for iter_id in range(4): model = step_resnet50_streamline_linear(model, cfg) model = step_resnet50_streamline_nonlinear(model, cfg) # big loop tidy up model = model.transform(RemoveUnusedTensors()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(SortGraph()) model = model.transform(DoubleToSingleFloat()) return model
def test_conv_lowering_cnv_w1a1(): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # execute imported model to get expected answer input_dict = {"0": input_tensor} output_dict_e = oxe.execute_onnx(model, input_dict) expected = output_dict_e[list(output_dict_e.keys())[0]] # execute transformed model and compare model = model.transform(LowerConvsToMatMul()) output_dict_p = oxe.execute_onnx(model, input_dict) produced = output_dict_p[list(output_dict_p.keys())[0]] assert np.isclose(produced, expected).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path)
def step_mobilenet_streamline(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(Streamline()) additional_streamline_transformations = [ DoubleToSingleFloat(), reorder.MoveMulPastDWConv(), absorb.AbsorbMulIntoMultiThreshold(), ChangeDataLayoutQuantAvgPool2d(), InferDataLayouts(), reorder.MoveTransposePastScalarMul(), absorb.AbsorbTransposeIntoFlatten(), reorder.MoveFlattenPastAffine(), reorder.MoveFlattenPastTopK(), reorder.MoveScalarMulPastMatMul(), CollapseRepeatedMul(), RemoveIdentityOps(), RoundAndClipThresholds(), ] for trn in additional_streamline_transformations: model = model.transform(trn) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) return model
def test_mobilenet_v1_4b(pretrained): finn_onnx = "mobilenet_v1_4b.onnx" mobilenet = quant_mobilenet_v1_4b(pretrained) mobilenet.eval() #load a random test vector np.random.seed(SEED) numpy_tensor = np.random.random(size=INPUT_SIZE).astype(np.float32) # run using PyTorch/Brevitas torch_tensor = torch.from_numpy(numpy_tensor).float() # do forward pass in PyTorch/Brevitas expected = mobilenet(torch_tensor).detach().numpy() export_finn_onnx(mobilenet, INPUT_SIZE, finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(GiveUniqueNodeNames()) model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) # run using FINN-based execution inp_name = model.graph.input[0].name input_dict = {inp_name: numpy_tensor} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict[list(output_dict.keys())[0]] assert np.isclose(produced, expected, atol=ATOL).all()
def step_resnet50_convert_to_hls(model: ModelWrapper, cfg: DataflowBuildConfig): model.set_tensor_datatype(model.graph.input[0].name, DataType["UINT8"]) model = model.transform(InferDataLayouts()) try: from finn.transformation.fpgadataflow.infer_doublepacked_dsp import InferDoublePackedConv model = model.transform(InferDoublePackedConv([1])) except: print( " FINN Experimental not available. Using non-packed convolution ") model = model.transform(DoubleToSingleFloat()) model = model.transform(InferDataTypes()) model = model.transform(SortGraph()) to_hls_transformations = [ to_hls.InferAddStreamsLayer, LowerConvsToMatMul, to_hls.InferChannelwiseLinearLayer, to_hls.InferPool_Batch, AbsorbTransposeIntoMultiThreshold, RoundAndClipThresholds, to_hls.InferQuantizedStreamingFCLayer, to_hls.InferThresholdingLayer, AbsorbConsecutiveTransposes, to_hls.InferConvInpGen, to_hls.InferDuplicateStreamsLayer, to_hls.InferLabelSelectLayer ] for trn in to_hls_transformations: model = model.transform(trn()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveCNVtoFCFlatten()) model = model.transform(GiveReadableTensorNames()) model = model.transform(RemoveUnusedTensors()) model = model.transform(SortGraph()) return model
def test_batchnorm_to_affine_cnv_w1a1(): lfc = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(lfc, (1, 3, 32, 32), export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) input_dict = {"0": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) expected = output_dict[list(output_dict.keys())[0]] new_model = model.transform(BatchNormToAffine()) # check that there are no BN nodes left op_types = list(map(lambda x: x.op_type, new_model.graph.node)) assert "BatchNormalization" not in op_types output_dict_p = oxe.execute_onnx(new_model, input_dict) produced = output_dict_p[list(output_dict_p.keys())[0]] assert np.isclose(expected, produced).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path)
def test_convert_to_hls_layers_cnv_w1a1(): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv) model = ModelWrapper(export_onnx_path_cnv) model = model.transform(DoubleToSingleFloat()) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) # model.save("golden.onnx") # load one of the test vectors fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # generate expected value from streamlined net input_dict = {"global_in": input_tensor} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) for node in model.graph.node: if node.op_type == "StreamingFCLayer_Batch": inst = getCustomOp(node) inst.set_nodeattr("mem_mode", "decoupled") mw = inst.get_nodeattr("MW") mh = inst.get_nodeattr("MH") if mh % 4 == 0: pe = mh // 4 else: pe = mh inst.set_nodeattr("PE", pe) if mw % 16 == 0: simd = mw // 16 else: simd = mw inst.set_nodeattr("SIMD", simd) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) # check topology status finn_nodes = model.get_finn_nodes() assert len(finn_nodes) == 18 non_finn_nodes = model.get_non_finn_nodes() assert len(non_finn_nodes) == 4 exp_non_finn_nodes = ["Transpose", "Reshape", "Mul", "Add"] assert [x.op_type for x in non_finn_nodes] == exp_non_finn_nodes fc_nodes = model.get_nodes_by_op_type("StreamingFCLayer_Batch") assert len(fc_nodes) == 9 swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator") assert len(swg_nodes) == 6 mp_nodes = model.get_nodes_by_op_type("StreamingMaxPool_Batch") assert len(mp_nodes) == 2 # model.save("cnv-pre-compile.onnx") model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) # model.save("cnv-post-compile.onnx") produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path_cnv)