def test_change_datalayout_quantavgpool(s, k, ibits, obits, signed, c, idim): n = 1 odim = compute_pool_output_dim(idim, k, s) # determine input FINN datatype if signed is True: prefix = "INT" else: prefix = "UINT" dt_name = prefix + str(ibits) dtype = DataType[dt_name] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [n, c, idim, idim]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [n, c, odim, odim]) node = helper.make_node( "QuantAvgPool2d", ["inp"], ["outp"], domain="finn", stride=s, kernel=k, ibits=ibits, obits=obits, signed=signed, data_layout="NCHW", ) graph = helper.make_graph(nodes=[node], name="single-quantavgpool", inputs=[inp], outputs=[outp]) model = helper.make_model(graph) model = ModelWrapper(model) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model_transformed = model.transform(ChangeDataLayoutQuantAvgPool2d()) model_transformed = model_transformed.transform(InferShapes()) model_transformed = model_transformed.transform(InferDataTypes()) model_transformed = model_transformed.transform(InferDataLayouts()) model_transformed = model_transformed.transform(GiveUniqueNodeNames()) model_transformed = model_transformed.transform(GiveReadableTensorNames()) inp_values = gen_finn_dt_tensor(dtype, [n, c, idim, idim]) idict = {"inp": inp_values} assert oxe.compare_execution(model, model_transformed, idict) assert len(model.graph.node) + 2 == len(model_transformed.graph.node) assert model_transformed.graph.node[-1].op_type == "Transpose" assert model_transformed.graph.node[0].op_type == "Transpose" # check if QuantAvgPool2d node has datalayout set correctly node = model_transformed.graph.node[1] d_layout = get_by_name(node.attribute, "data_layout").s.decode("UTF-8") assert d_layout == "NHWC" assert model_transformed.get_tensor_layout( node.input[0]) == DataLayout.NHWC assert model_transformed.get_tensor_layout( node.output[0]) == DataLayout.NHWC
def test_infer_data_layouts(): raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Conv_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("MaxPool_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC model = model.transform(LowerConvsToMatMul()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Transpose_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("Im2Col_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("MaxPool_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout("MatMul_2_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC
def step_convert_to_hls(model: ModelWrapper, cfg: DataflowBuildConfig): """Convert eligible nodes to `HLSCustomOp` subclasses that represent HLS layers. Which nodes and particular configurations can be converted to HLS is limited, see the source code of the `convert_to_hls` module for more.""" mem_mode = cfg.default_mem_mode.value if cfg.standalone_thresholds: # doing this first causes all threshold layers to be standalone model = model.transform(to_hls.InferThresholdingLayer()) # needed for bipolar MatMul layers model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode)) # needed for non-bipolar MatMul layers model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) # TopK to LabelSelect model = model.transform(to_hls.InferLabelSelectLayer()) # input quantization (if any) as standalone threshold model = model.transform(to_hls.InferThresholdingLayer()) # needed for convolutions -- TODO always exec? need_conv = len(model.get_nodes_by_op_type("Im2Col")) > 0 if need_conv: model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) model = model.transform(RemoveCNVtoFCFlatten()) # get rid of Tranpose -> Tranpose identity seq model = model.transform(absorb.AbsorbConsecutiveTransposes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(InferDataLayouts()) return model
def test_end2end_mobilenet_streamline(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_tidy.onnx") model = model.transform(Streamline()) additional_streamline_transformations = [ DoubleToSingleFloat(), reorder.MoveMulPastDWConv(), absorb.AbsorbMulIntoMultiThreshold(), ChangeDataLayoutQuantAvgPool2d(), InferDataLayouts(), reorder.MoveTransposePastScalarMul(), absorb.AbsorbTransposeIntoFlatten(), reorder.MoveFlattenPastAffine(), reorder.MoveFlattenPastTopK(), reorder.MoveScalarMulPastMatMul(), CollapseRepeatedMul(), RemoveIdentityOps(), RoundAndClipThresholds(), ] for trn in additional_streamline_transformations: model = model.transform(trn) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model.save(build_dir + "/end2end_mobilenet_streamlined.onnx") assert (len(model.get_nodes_by_op_type("Add")) == 1 ) # only final quantized bias Add op remains assert len(model.get_nodes_by_op_type("Mul")) == 0 # no Mul ops remain
def test_end2end_mobilenet_folding(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_hls_layers.onnx") # optional extra folding to use fewer resources # applied while setting the attributes on each node assert extra_fold in [1, 2, 4] # set up folding for the depthwise conv layers impl'd by VVAUs # each value is PE for a layer fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") # each tuple is (PE, SIMD, ram_style) for a layer folding = [ (32, 3, "block"), (16, 16, "block"), (16, 16, "block"), (32, 16, "block"), (16, 16, "block"), (32, 16, "block"), (16, 16, "block"), (32, 16, "block"), (32, 16, "block"), (32, 16, "block"), (32, 16, "block"), (32, 16, "block"), (16, 16, "block"), (32, 16, "block"), (4, 4, "block"), ] for fcl, (pe, simd, ramstyle) in zip(fc_layers, folding): fcl_inst = getCustomOp(fcl) fcl_inst.set_nodeattr("PE", pe // extra_fold) fcl_inst.set_nodeattr("SIMD", simd) fcl_inst.set_nodeattr("ram_style", ramstyle) # first layer uses 8-bit weights & activations # control its compute resource type explicitly getCustomOp(fc_layers[0]).set_nodeattr("resType", first_layer_res_type) # set up folding for the depthwise conv layers impl'd by VVAUs # each value is PE for a layer vvau_layers = model.get_nodes_by_op_type("Vector_Vector_Activate_Batch") folding = [32, 32, 64, 16, 32, 8, 16, 16, 16, 16, 16, 4, 8] for vvau, pe in zip(vvau_layers, folding): vvau_inst = getCustomOp(vvau) vvau_inst.set_nodeattr("PE", pe // extra_fold) # set SIMD in preceeding ConvInputGen to same value convinputgen = model.find_direct_predecessors(vvau)[0] convinputgen_inst = getCustomOp(convinputgen) convinputgen_inst.set_nodeattr("SIMD", pe // extra_fold) # set SIMD in preceeding FMPadding to same value padding = model.find_direct_predecessors(convinputgen)[0] if padding.op_type == "FMPadding_Batch": padding_inst = getCustomOp(padding) padding_inst.set_nodeattr("SIMD", pe // extra_fold) # adjust final pooling layer + its inpgen pool_node = model.get_nodes_by_op_type("Pool_Batch")[0] pool_inst = getCustomOp(pool_node) pool_inst.set_nodeattr("PE", 4 // extra_fold) pool_inpgen = model.find_direct_predecessors(pool_node)[0] pool_inpgen_inst = getCustomOp(pool_inpgen) pool_inpgen_inst.set_nodeattr("SIMD", 4 // extra_fold) model = model.transform(InferDataLayouts()) model.save(build_dir + "/end2end_mobilenet_folded.onnx")
def test_convert_to_hls_layers(self, topology, wbits, abits): prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "streamline") model = load_test_checkpoint_or_skip(prev_chkpt_name) if topology == "tfc" and wbits == 1 and abits == 1: # use standalone thresholds for tfc-w1a1 to also exercise that option model = model.transform(to_hls.InferThresholdingLayer()) # needed for bipolar MatMul layers model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode)) # needed for non-bipolar MatMul layers model = model.transform( to_hls.InferQuantizedStreamingFCLayer(mem_mode)) # TopK to LabelSelect model = model.transform(to_hls.InferLabelSelectLayer()) # input quantization (if any) to standalone thresholding model = model.transform(to_hls.InferThresholdingLayer()) # needed for convolutions if "fc" not in topology: model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) model = model.transform(RemoveCNVtoFCFlatten()) # get rid of Tranpose -> Tranpose identity seq model = model.transform(absorb.AbsorbConsecutiveTransposes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(InferDataLayouts()) model.save( get_checkpoint_name(topology, wbits, abits, "convert_to_hls_layers"))
def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig): """Run streamlining on given model. Streamlining involves moving floating point scale/shift parameters around, collapsing adjacent ones into a single parameter, then absorbing the scale/shift into the following `MultiThreshold` node. Streamlining requires careful topology design and cannot be applied to all topologies. """ model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold()) model = model.transform(Streamline()) need_lowering = len(model.get_nodes_by_op_type("Conv")) > 0 if need_lowering: model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) # absorb final add-mul nodes into TopK model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) if VerificationStepType.STREAMLINED_PYTHON in cfg._resolve_verification_steps( ): verify_step(model, cfg, "streamlined_python", need_parent=False) return model
def test_move_flatten_past_affine(data_layout, batch_size): if data_layout == DataLayout.NHWC: ishape = [batch_size, 1, 1, 1024] oshape = [batch_size, 1000] else: ishape = [batch_size, 1024, 1, 1] oshape = [batch_size, 1000] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) a0 = helper.make_tensor_value_info("a1", TensorProto.FLOAT, [1024, 1000]) a1 = helper.make_tensor_value_info("a2", TensorProto.FLOAT, []) a2 = helper.make_tensor_value_info("a3", TensorProto.FLOAT, [1000]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) flatten_node = helper.make_node("Flatten", ["inp"], ["flatten_out"]) matmul_node = helper.make_node("MatMul", ["flatten_out", "a0"], ["matmul_out"]) mul_node = helper.make_node("Mul", ["matmul_out", "a1"], ["mul_out"]) add_node = helper.make_node("Add", ["mul_out", "a2"], ["outp"]) graph = helper.make_graph( nodes=[flatten_node, matmul_node, mul_node, add_node], name="move-reshape-graph", inputs=[inp], outputs=[outp], value_info=[a0, a1, a2], ) model = helper.make_model(graph, producer_name="move_reshape_model") model = ModelWrapper(model) # initialize values a0_values = gen_finn_dt_tensor(DataType["TERNARY"], [1024, 1000]) model.set_initializer("a0", a0_values) a1_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32) model.set_initializer("a1", a1_values) a2_values = np.random.uniform(low=-1, high=1, size=(1000)).astype(np.float32) model.set_initializer("a2", a2_values) model.set_tensor_datatype("inp", DataType["INT2"]) model.set_tensor_layout("inp", data_layout) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # compare execution before and after transformation inp_values = gen_finn_dt_tensor(DataType["INT2"], ishape) idict = {model.graph.input[0].name: inp_values} model_transformed = model.transform(MoveFlattenPastAffine()) assert oxe.compare_execution(model, model_transformed, idict) # depending on data layout check if graph is transformed or not if data_layout == DataLayout.NHWC: # check if nodes have new order in transformed graph assert model.graph != model_transformed.graph assert model_transformed.graph.node[-1].op_type == "Flatten" else: assert model.graph == model_transformed.graph
def apply(self, model): graph = model.graph node_ind = 0 graph_modified = False for n in graph.node: node_ind += 1 if ( n.op_type == "Transpose" and not model.is_fork_node(n) and not model.is_join_node(n) ): consumer = model.find_consumer(n.output[0]) if ( consumer is not None and consumer.op_type == "Mul" and not model.is_join_node(consumer) ): mul_weight_name = consumer.input[1] A = model.get_initializer(mul_weight_name) if A is None: warnings.warn("Mul param is not constant, skipping") continue transp_node = n mul_node = consumer start_name = transp_node.input[0] middle_name = transp_node.output[0] end_name = mul_node.output[0] transp_in_shape = model.get_tensor_shape(start_name) transp_out_shape = model.get_tensor_shape(middle_name) transp_in_layout = model.get_tensor_layout(start_name) transp_out_layout = model.get_tensor_layout(middle_name) if transp_in_layout is None or transp_out_layout is None: warnings.warn( """Datalayout is not set for tensors. Transformation can't be applied.""" ) continue if all(x == 1 for x in A.shape): # if the mul is scalar, we can simply swap the order of ops # rewire transpose input to be mul input mul_node.input[0] = start_name model.set_tensor_shape(start_name, transp_in_shape) model.set_tensor_layout(start_name, transp_in_layout) mul_node.output[0] = middle_name model.set_tensor_shape(middle_name, transp_in_shape) model.set_tensor_layout(middle_name, transp_in_layout) transp_node.input[0] = middle_name transp_node.output[0] = end_name model.set_tensor_shape(end_name, transp_out_shape) model.set_tensor_layout(end_name, transp_out_layout) graph.node.remove(transp_node) graph.node.insert(node_ind, transp_node) graph_modified = True if graph_modified is True: model = model.transform(InferDataLayouts()) model = model.transform(InferShapes()) return (model, graph_modified)
def step_mobilenet_lower_convs(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(LowerConvsToMatMul()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RoundAndClipThresholds()) model = model.transform(InferDataLayouts()) return model
def apply(self, model): _check_vitis_envvars() # first infer layouts model = model.transform(InferDataLayouts()) # prepare at global level, then break up into kernels prep_transforms = [ MakePYNQDriver(platform="alveo"), InsertIODMA(512), InsertDWC(), ] for trn in prep_transforms: model = model.transform(trn) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Floorplan(floorplan=self.floorplan_file)) model = model.transform(CreateDataflowPartition()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # Build each kernel individually sdp_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition") for sdp_node in sdp_nodes: sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") kernel_model = ModelWrapper(dataflow_model_filename) kernel_model = kernel_model.transform(InsertFIFO()) kernel_model = kernel_model.transform( InsertTLastMarker(both=True, external=False, dynamic=False)) kernel_model = kernel_model.transform(GiveUniqueNodeNames()) kernel_model.save(dataflow_model_filename) kernel_model = kernel_model.transform( PrepareIP(self.fpga_part, self.period_ns)) kernel_model = kernel_model.transform(HLSSynthIP()) kernel_model = kernel_model.transform( CreateStitchedIP(self.fpga_part, self.period_ns, sdp_node.onnx_node.name, True)) kernel_model = kernel_model.transform( CreateVitisXO(sdp_node.onnx_node.name)) kernel_model.set_metadata_prop("platform", "alveo") kernel_model.save(dataflow_model_filename) # Assemble design from kernels model = model.transform( VitisLink( self.platform, round(1000 / self.period_ns), strategy=self.strategy, enable_debug=self.enable_debug, )) # set platform attribute for correct remote execution model.set_metadata_prop("platform", "alveo") return (model, False)
def step_resnet50_convert_to_hls(model: ModelWrapper, cfg: DataflowBuildConfig): model.set_tensor_datatype(model.graph.input[0].name, DataType["UINT8"]) model = model.transform(InferDataLayouts()) try: from finn.transformation.fpgadataflow.infer_doublepacked_dsp import InferDoublePackedConv model = model.transform(InferDoublePackedConv([1])) except: print( " FINN Experimental not available. Using non-packed convolution ") model = model.transform(DoubleToSingleFloat()) model = model.transform(InferDataTypes()) model = model.transform(SortGraph()) to_hls_transformations = [ to_hls.InferAddStreamsLayer, LowerConvsToMatMul, to_hls.InferChannelwiseLinearLayer, to_hls.InferPool_Batch, AbsorbTransposeIntoMultiThreshold, RoundAndClipThresholds, to_hls.InferQuantizedStreamingFCLayer, to_hls.InferThresholdingLayer, AbsorbConsecutiveTransposes, to_hls.InferConvInpGen, to_hls.InferDuplicateStreamsLayer, to_hls.InferLabelSelectLayer ] for trn in to_hls_transformations: model = model.transform(trn()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveCNVtoFCFlatten()) model = model.transform(GiveReadableTensorNames()) model = model.transform(RemoveUnusedTensors()) model = model.transform(SortGraph()) return model
def test_fpgadataflow_ipstitch_iodma_floorplan(): model = create_one_fc_model() if model.graph.node[0].op_type == "StreamingDataflowPartition": sdp_node = getCustomOp(model.graph.node[0]) assert sdp_node.__class__.__name__ == "StreamingDataflowPartition" assert os.path.isfile(sdp_node.get_nodeattr("model")) model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model")) model = model.transform(InferDataLayouts()) model = model.transform(InsertIODMA()) model = model.transform(Floorplan()) assert getCustomOp(model.graph.node[0]).get_nodeattr("partition_id") == 0 assert getCustomOp(model.graph.node[1]).get_nodeattr("partition_id") == 2 assert getCustomOp(model.graph.node[2]).get_nodeattr("partition_id") == 1 model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_iodma_floorplan.onnx")
def streamline(model, binary=True): log("Streamline transformations launched") model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(Streamline()) # Absorb add and mul in thresholds model = model.transform(absorb.AbsorbAddIntoMultiThreshold()) model = model.transform(absorb.AbsorbMulIntoMultiThreshold()) # Absorb add-mul in top-k model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(RoundAndClipThresholds()) # Tidy-up model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) log("Streamline transformations completed") save(model, "3_streamlined") return model
def test_streamline(self, topology, wbits, abits): prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post") model = load_test_checkpoint_or_skip(prev_chkpt_name) # move past any reshapes to be able to streamline input scaling model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(Streamline()) if "fc" not in topology: model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) # absorb final add-mul nodes into TopK model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) model.save(get_checkpoint_name(topology, wbits, abits, "streamline"))
def test_move_flatten_past_affine(data_layout, batch_size): if data_layout == DataLayout.NHWC: ishape = [batch_size, 1, 1, 1024] oshape = [batch_size, 1024] else: ishape = [batch_size, 1024, 1, 1] oshape = [batch_size, 1024] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) flatten_node = helper.make_node("Flatten", ["inp"], ["outp"]) graph = helper.make_graph( nodes=[flatten_node], name="move-flatten-graph", inputs=[inp], outputs=[outp], ) model = helper.make_model(graph, producer_name="move_flatten_model") model = ModelWrapper(model) model.set_tensor_datatype("inp", DataType.INT2) model.set_tensor_layout("inp", data_layout) model = model.transform(InsertTopK()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # compare execution before and after transformation inp_values = gen_finn_dt_tensor(DataType.INT2, ishape) idict = {model.graph.input[0].name: inp_values} model_transformed = model.transform(MoveFlattenPastTopK()) assert oxe.compare_execution(model, model_transformed, idict) # depending on data layout check if graph is transformed or not if data_layout == DataLayout.NHWC: # check if nodes have new order in transformed graph assert model.graph != model_transformed.graph assert model_transformed.graph.node[-1].op_type == "Flatten" else: assert model.graph == model_transformed.graph
def apply(self, model): # first infer layouts model = model.transform(InferDataLayouts()) # prepare at global level, then break up into kernels prep_transforms = [ InsertIODMA(64), InsertDWC(), Floorplan(), CreateDataflowPartition(), ] for trn in prep_transforms: model = model.transform(trn) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # Build each kernel individually sdp_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition") for sdp_node in sdp_nodes: prefix = sdp_node.name + "_" sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") kernel_model = ModelWrapper(dataflow_model_filename) kernel_model = kernel_model.transform(InsertFIFO()) kernel_model = kernel_model.transform(GiveUniqueNodeNames(prefix)) kernel_model.save(dataflow_model_filename) kernel_model = kernel_model.transform( PrepareIP(self.fpga_part, self.period_ns)) kernel_model = kernel_model.transform(HLSSynthIP()) kernel_model = kernel_model.transform( CreateStitchedIP(self.fpga_part, self.period_ns, sdp_node.onnx_node.name, True)) kernel_model.set_metadata_prop("platform", "zynq-iodma") kernel_model.save(dataflow_model_filename) # Assemble design from IPs model = model.transform( MakeZYNQProject(self.platform, enable_debug=self.enable_debug)) # set platform attribute for correct remote execution model.set_metadata_prop("platform", "zynq-iodma") # create driver model = model.transform(MakePYNQDriver(platform="zynq-iodma")) return (model, False)
def test_end2end_mobilenet_tidy_and_merge_with_preproc(): preproc_model = load_test_checkpoint_or_skip( build_dir + "/end2end_mobilenet_preproc.onnx") model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_export.onnx") model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(InsertTopK()) # get initializer from Mul that will be absorbed into topk a0 = model.get_initializer(model.graph.node[-2].input[1]) np.save(build_dir + "/end2end_mobilenet_topk_scale.npy", a0) model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveUniqueParameterTensors()) model = model.transform(GiveReadableTensorNames()) model = model.transform(MergeONNXModels(preproc_model)) model.save(build_dir + "/end2end_mobilenet_tidy.onnx")
def make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt, pshape): inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, ishape) p0 = helper.make_tensor_value_info("p0", TensorProto.FLOAT, pshape) model = helper.make_model( helper.make_graph( name="test", inputs=[inp], outputs=[outp], value_info=[p0], nodes=[helper.make_node(onnx_op_name, ["inp", "p0"], ["outp"])], )) model = ModelWrapper(model) model.set_initializer("p0", gen_finn_dt_tensor(pdt, pshape)) model.set_tensor_datatype("inp", idt) model.transform(InferDataLayouts(), make_deepcopy=False) model.transform(InferShapes(), make_deepcopy=False) return model
def step_mobilenet_streamline(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(Streamline()) additional_streamline_transformations = [ DoubleToSingleFloat(), reorder.MoveMulPastDWConv(), absorb.AbsorbMulIntoMultiThreshold(), ChangeDataLayoutQuantAvgPool2d(), InferDataLayouts(), reorder.MoveTransposePastScalarMul(), absorb.AbsorbTransposeIntoFlatten(), reorder.MoveFlattenPastAffine(), reorder.MoveFlattenPastTopK(), reorder.MoveScalarMulPastMatMul(), CollapseRepeatedMul(), RemoveIdentityOps(), RoundAndClipThresholds(), ] for trn in additional_streamline_transformations: model = model.transform(trn) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) return model
def test_merge_onnx_models(): # load pre model raw_m = get_data("finn", "data/onnx/mnist-conv/model.onnx") model1 = ModelWrapper(raw_m) # the input for model1 comes from a uint8 vector so we set the finn datatype # of the input tensor to DataType.UINT8 to verify that the datatypes are correctly # preserved in the transformed model model1.set_tensor_datatype(model1.graph.input[0].name, DataType.UINT8) model1 = model1.transform(InferShapes()) model1 = model1.transform(GiveUniqueNodeNames()) model1 = model1.transform(GiveReadableTensorNames()) # set up post model shape = [1, 10] inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, shape) a0 = helper.make_tensor_value_info("a0", TensorProto.FLOAT, []) a1 = helper.make_tensor_value_info("a1", TensorProto.FLOAT, []) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, shape) mul_node = helper.make_node("Mul", ["inp", "a0"], ["mul_out"]) div_node = helper.make_node("Div", ["mul_out", "a1"], ["outp"]) graph = helper.make_graph( nodes=[mul_node, div_node], name="model2-graph", inputs=[inp], outputs=[outp], value_info=[a0, a1], ) model2 = helper.make_model(graph, producer_name="model2") model2 = ModelWrapper(model2) # initialize model2 a0_value = np.random.uniform(low=0, high=1, size=(1)).astype(np.float32) model2.set_initializer("a0", a0_value) a1_value = np.random.uniform(low=0.1, high=1, size=(1)).astype(np.float32) model2.set_initializer("a1", a1_value) # set a dummy sparsity annotation to check if it gets correctly transferred # to the merged model sparsity = {"dw": {"kernel_shape": 0}} model2.set_tensor_sparsity("a1", sparsity) model2 = model2.transform(InferShapes()) model2 = model2.transform(InferDataTypes()) model2 = model2.transform(InferDataLayouts()) model2 = model2.transform(GiveUniqueNodeNames()) model2 = model2.transform(GiveReadableTensorNames()) # simulate the models before the merging and pass the output of model1 to model2 # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") inp_values = onnx.load_tensor_from_string(raw_i) inp_values = np_helper.to_array(inp_values) idict = {model1.graph.input[0].name: inp_values} odict = oxe.execute_onnx(model1, idict) temp = odict[model1.graph.output[0].name] idict = {model2.graph.input[0].name: temp} odict = oxe.execute_onnx(model2, idict) outp = odict[model2.graph.output[0].name] # merge models model_transformed = model2.transform(MergeONNXModels(model1)) idict = {model_transformed.graph.input[0].name: inp_values} odict = oxe.execute_onnx(model_transformed, idict) outp_transformed = odict[model_transformed.graph.output[0].name] assert (outp == outp_transformed).all() assert len(model_transformed.graph.node) == len(model1.graph.node) + len( model2.graph.node ) # to test if the value is preserved we set the sparsity annotation of input[1] # of the division block to a dummy value, we can now look for the division block # and check if the sparsity annotation is still the same for n in model_transformed.graph.node: if n.op_type == "Div": tensor_name = n.input[1] set_sparsity = model_transformed.get_tensor_sparsity(tensor_name) assert sparsity == set_sparsity # check if finn datatype of graph.input[0] is still set to UINT8 assert model_transformed.get_tensor_datatype("global_in") == DataType.UINT8
def test_infer_data_layouts_cnv(): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv) model = ModelWrapper(export_onnx_path_cnv) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Conv_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("MaxPool_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("MultiThreshold_6_out0") == DataLayout.NCHW assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Transpose_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("Im2Col_0_out0") == DataLayout.NHWC # note: im2col output isn't really NHWC or any other common layout # since the concept of channels changes with lowering... but it is # conceptually close to NHWC since the innermost dim gets multiplied assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("Transpose_1_out0") == DataLayout.NCHW assert model.get_tensor_layout("Transpose_2_out0") == DataLayout.NHWC assert model.get_tensor_layout("MaxPoolNHWC_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Transpose_0_out0") == DataLayout.NHWC # note: im2col output isn't really NHWC or any other common layout # since the concept of channels changes with lowering... but it is # conceptually close to NHWC since the innermost dim gets multiplied assert (model.get_tensor_layout("ConvolutionInputGenerator_0_out0") == DataLayout.NHWC) assert model.get_tensor_layout( "StreamingFCLayer_Batch_3_out0") == DataLayout.NHWC assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout( "StreamingFCLayer_Batch_6_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC os.remove(export_onnx_path_cnv)
def test_move_transpose_past_scalar_mul(perm, scalar, data_layout): inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, 2, 3, 4]) # to determine out_size we need to calculate with "perm" for this test case dummy_in = np.random.uniform(low=0, high=1, size=(1, 2, 3, 4)).astype(np.float32) out_size = dummy_in.transpose(tuple(perm)).shape if scalar is True: a0_size = [] else: a0_size = out_size a0 = helper.make_tensor_value_info("a0", TensorProto.FLOAT, a0_size) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, out_size) transp_node = helper.make_node("Transpose", ["inp"], ["transp_out"], perm=perm) mul_node = helper.make_node("Mul", ["transp_out", "a0"], ["outp"]) graph = helper.make_graph( nodes=[transp_node, mul_node], name="mv-transpose-graph", inputs=[inp], outputs=[outp], value_info=[a0], ) model = helper.make_model(graph, producer_name="mv_transpose_model") model = ModelWrapper(model) # initialize values a0_values = np.random.uniform(low=0, high=1, size=tuple(a0_size)).astype(np.float32) model.set_initializer("a0", a0_values) if data_layout is not None: model.set_tensor_layout("inp", data_layout) model = model.transform(InferDataLayouts()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # compare execution before and after transformation inp_values = np.random.uniform(low=0, high=1, size=(1, 2, 3, 4)).astype(np.float32) idict = {model.graph.input[0].name: inp_values} model_transformed = model.transform(MoveTransposePastScalarMul()) assert oxe.compare_execution(model, model_transformed, idict) # check if order changed if scalar is True and data_layout is not None: assert model_transformed.graph.node[0] != model.graph.node[0] assert model_transformed.graph.node[1] != model.graph.node[1] assert model_transformed.graph.node[0].op_type == "Mul" assert model_transformed.graph.node[1].op_type == "Transpose" mul_input = model_transformed.graph.node[0].input[0] mul_output = model_transformed.graph.node[0].output[0] assert model_transformed.get_tensor_layout(mul_input) == data_layout assert model_transformed.get_tensor_layout(mul_output) == data_layout else: assert model_transformed.graph.node[0] == model.graph.node[0] assert model_transformed.graph.node[1] == model.graph.node[1] if data_layout is not None: mul_input = model_transformed.graph.node[1].input[0] mul_output = model_transformed.graph.node[1].output[0] assert model_transformed.get_tensor_layout( mul_input) != data_layout assert model_transformed.get_tensor_layout( mul_output) != data_layout
def test_brevitas_compare_exported_mobilenet(): if "IMAGENET_VAL_PATH" not in os.environ.keys(): pytest.skip("Can't do validation without IMAGENET_VAL_PATH") n_images = 10 debug_mode = False export_onnx_path = make_build_dir("test_brevitas_mobilenet-v1_") # export preprocessing preproc_onnx = export_onnx_path + "/quant_mobilenet_v1_4b_preproc.onnx" preproc = NormalizePreProc(mean, std, ch) bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx) preproc_model = ModelWrapper(preproc_onnx) preproc_model = preproc_model.transform(InferShapes()) preproc_model = preproc_model.transform(GiveUniqueNodeNames()) preproc_model = preproc_model.transform(GiveUniqueParameterTensors()) preproc_model = preproc_model.transform(GiveReadableTensorNames()) # export the actual MobileNet-v1 finn_onnx = export_onnx_path + "/quant_mobilenet_v1_4b.onnx" mobilenet = get_test_model_trained("mobilenet", 4, 4) if debug_mode: dbg_hook = bo.enable_debug(mobilenet) bo.export_finn_onnx(mobilenet, (1, 3, 224, 224), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(RemoveStaticGraphInputs()) model = model.transform(InsertTopK()) # get initializer from Mul that will be absorbed into topk a0 = model.get_initializer(model.get_nodes_by_op_type("Mul")[-1].input[1]) model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveUniqueParameterTensors()) model = model.transform(GiveReadableTensorNames()) model.save(export_onnx_path + "/quant_mobilenet_v1_4b_wo_preproc.onnx") # create merged preprocessing + MobileNet-v1 model model = model.transform(MergeONNXModels(preproc_model)) model.save(export_onnx_path + "/quant_mobilenet_v1_4b.onnx") with open( export_onnx_path + "/mobilenet_validation.csv", "w", newline="" ) as csvfile: writer = csv.writer(csvfile) writer.writerow( [ "goldenID", "brevitasTop5", "brevitasTop5[%]", "finnTop5", "finnTop5[%]", "top5equal", "top5%equal", ] ) csvfile.flush() workload = imagenet_util.get_val_images(n_images, interleave_classes=True) all_inds_ok = True all_probs_ok = True for (img_path, target_id) in workload: img_np = imagenet_util.load_resize_crop(img_path) img_torch = torch.from_numpy(img_np).float() # do forward pass in PyTorch/Brevitas input_tensor = preproc.forward(img_torch) expected = mobilenet.forward(input_tensor).detach().numpy() expected_topk = expected.flatten() expected_top5 = np.argsort(expected_topk)[-5:] expected_top5 = np.flip(expected_top5) expected_top5_prob = [] for index in expected_top5: expected_top5_prob.append(expected_topk[index]) idict = {model.graph.input[0].name: img_np} odict = oxe.execute_onnx(model, idict, return_full_exec_context=True) produced = odict[model.graph.output[0].name] produced_prob = odict["TopK_0_out0"] * a0 inds_ok = (produced.flatten() == expected_top5).all() probs_ok = np.isclose(produced_prob.flatten(), expected_top5_prob).all() all_inds_ok = all_inds_ok and inds_ok all_probs_ok = all_probs_ok and probs_ok writer.writerow( [ str(target_id), str(expected_top5), str(expected_top5_prob), str(produced.flatten()), str(produced_prob.flatten()), str(inds_ok), str(probs_ok), ] ) csvfile.flush() if ((not inds_ok) or (not probs_ok)) and debug_mode: print("Results differ for %s" % img_path) # check all tensors at debug markers names_brevitas = set(dbg_hook.values.keys()) names_finn = set(odict.keys()) names_common = names_brevitas.intersection(names_finn) for dbg_name in names_common: if not np.isclose( dbg_hook.values[dbg_name].detach().numpy(), odict[dbg_name], atol=1e-3, ).all(): print("Tensor %s differs between Brevitas and FINN" % dbg_name) assert all_inds_ok and all_probs_ok
def apply(self, model): graph_modified = False pre_model = self.pre_model post_model = copy.deepcopy(model) # to avoid mix-ups, start by giving all tensors random names pre_model = pre_model.transform(GiveRandomTensorNames()) post_model = post_model.transform(GiveRandomTensorNames()) # check for dynamic outputs of pre model dyn_outp = [] for outp in pre_model.graph.output: init_val = pre_model.get_initializer(outp.name) if init_val is None: dyn_outp.append(outp) if len(dyn_outp) != 1: warnings.warn( "The pre model has more than one dynamic output! The transformation " "tries to connect the first dynamic output to the first dynamic input " "of the post model.") # check for dynamic inputs of post model dyn_inp = [] for inp in post_model.graph.input: init_val = post_model.get_initializer(inp.name) if init_val is None: dyn_inp.append(inp) if len(dyn_inp) != 1: warnings.warn( "The post model has more than one dynamic input! The transformation " "tries to connect the first dynamic input to the first dynamic output " "of the pre model.") # erase all node names to avoid conflict for n in pre_model.graph.node: n.name = "" for n in post_model.graph.node: n.name = "" # check if models can be merged output_model_a = dyn_outp[0].name input_model_b = dyn_inp[0].name output_a_shape = pre_model.get_tensor_shape(output_model_a) input_b_shape = post_model.get_tensor_shape(input_model_b) assert (output_a_shape == input_b_shape ), "Models can't be merged! Shapes don't match." # connect output of one model to input of the other for n in pre_model.graph.node: if output_model_a == n.output[0]: n.output[0] = input_model_b # extract information for new model # nodes node_pre = [node for node in pre_model.graph.node] node_post = [node for node in post_model.graph.node] node_new = node_pre + node_post # in and output inp = pre_model.graph.input[0] outp = post_model.graph.output[0] vi_pre = [x for x in pre_model.graph.value_info] out_pre = [x for x in pre_model.graph.output] qa_pre = [x for x in pre_model.graph.quantization_annotation] init_pre = [x for x in pre_model.graph.initializer] vi_post = [x for x in post_model.graph.value_info] qa_post = [x for x in post_model.graph.quantization_annotation] init_post = [x for x in post_model.graph.initializer] vi_new = vi_pre + vi_post + out_pre qa_new = qa_pre + qa_post init_new = init_pre + init_post # create new graph and model new_graph = helper.make_graph( nodes=node_new, name="fuse-graph", inputs=[inp], outputs=[outp], value_info=vi_new, ) new_model = helper.make_model(new_graph, producer_name="fuse_model") new_model = ModelWrapper(new_model) for i in init_new: new_model.graph.initializer.append(i) for qa in qa_new: new_model.graph.quantization_annotation.append(qa) # tidy-up new model model = new_model model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveUniqueParameterTensors()) model = model.transform(GiveReadableTensorNames()) return (model, graph_modified)
def test_convert_to_hls_layers_synthetic(ch, ifmdim, idt): model = make_model(ch, ifmdim) model.save(export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) # model.save("golden.onnx") # generate test vectors of correct shape if ifmdim == -1: input_tensor_shape = (1, ch) else: input_tensor_shape = (1, ch, ifmdim, ifmdim) x = gen_finn_dt_tensor(idt, input_tensor_shape) # generate expected value from streamlined net input_dict = {model.graph.input[0].name: x} output_dict = oxe.execute_onnx(model, input_dict, True) produced_sum = output_dict[model.graph.output[0].name] chw_mul = model.get_initializer(model.graph.node[-1].input[1]) chw_mul = 1 expected_sum = chw_mul * np.sum(2 * (2 * x + 15.0), axis=(2, 3)) / (ifmdim * ifmdim) assert (produced_sum.flatten() == expected_sum.flatten()).all() model = model.transform(InferDataLayouts()) # convert to hls model.set_tensor_datatype(model.graph.input[0].name, idt) # extra streamlining model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(MoveAddPastMul()) model = model.transform(CollapseRepeatedMul()) model = model.transform(CollapseRepeatedAdd()) # insert top-k node, which should absorb linear ops before it model = model.transform(InferShapes()) model = model.transform(InferDataLayouts()) model = model.transform(InferDataTypes()) model = model.transform(to_hls.InferChannelwiseLinearLayer()) model = model.transform(to_hls.InferAddStreamsLayer()) model = model.transform(to_hls.InferGlobalAccPoolLayer()) model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(InsertTopK()) model = model.transform(AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataTypes()) model = model.transform(to_hls.InferLabelSelectLayer()) model = model.transform(AbsorbConsecutiveTransposes()) model = model.transform(InferDataTypes()) model = model.transform(to_hls.InferLabelSelectLayer()) model = model.transform(to_hls.InferDuplicateStreamsLayer()) model = model.transform(SortGraph()) # model.save("golden_hls.onnx") # check topology status finn_nodes = model.get_finn_nodes() assert len(finn_nodes) == 9 add_nodes = model.get_nodes_by_op_type("AddStreams_Batch") assert len(add_nodes) == 1 pool_nodes = model.get_nodes_by_op_type("GlobalAccPool_Batch") assert len(pool_nodes) == 1 label_nodes = model.get_nodes_by_op_type("LabelSelect_Batch") assert len(label_nodes) == 1 channelwise_nodes = model.get_nodes_by_op_type("ChannelwiseOp_Batch") assert len(channelwise_nodes) == 5 dup_nodes = model.get_nodes_by_op_type("DuplicateStreams_Batch") assert len(dup_nodes) == 1 model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) output_dict = oxe.execute_onnx(model, input_dict, True) produced_topk_hls = output_dict[model.graph.output[0].name] topk_input = output_dict[model.graph.node[-1].input[0]] assert soft_verify_topk(topk_input, produced_topk_hls, 5) os.remove(export_onnx_path)
def test_absorb_transp_into_flatten(perm, shape, ishape, data_layout): inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) transp_node = helper.make_node("Transpose", ["inp"], ["transp_out"], perm=perm) dummy_in = np.random.uniform(low=0, high=1, size=tuple(ishape)).astype(np.float32) if shape is None: shape_node = helper.make_node("Flatten", ["transp_out"], ["outp"]) dummy_in = dummy_in.transpose(tuple(perm)) oshape = dummy_in.reshape(dummy_in.shape[0], -1).shape outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) shape0 = None else: shape0 = helper.make_tensor_value_info("shape0", TensorProto.FLOAT, shape) shape_node = helper.make_node("Reshape", ["transp_out", "shape0"], ["outp"]) oshape = dummy_in.transpose(tuple(perm)).reshape(tuple(shape)).shape outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) graph = helper.make_graph( nodes=[transp_node, shape_node], name="absorb-transpose-graph", inputs=[inp], outputs=[outp], ) model = helper.make_model(graph, producer_name="absorb_transpose_model") model = ModelWrapper(model) if shape is not None: model.graph.value_info.append(shape0) model.set_initializer("shape0", np.asarray(shape)) if data_layout == "NCHW": model.set_tensor_layout("inp", DataLayout.NCHW) else: model.set_tensor_layout("inp", DataLayout.NHWC) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # model.save("test.onnx") model_transformed = model.transform(AbsorbTransposeIntoFlatten()) # model_transformed.save("test2.onnx") # verify transformation inp_values = np.random.uniform(low=-1, high=1, size=tuple(ishape)).astype( np.float32 ) idict = {model.graph.input[0].name: inp_values} assert oxe.compare_execution(model, model_transformed, idict) # only some of the parameter combinations lead to a graph that will be changed when # AbsorbTransposeIntoFlatten is applied if shape == [-1, 1]: # not a flatten operation, so the graph will not be changed assert model.graph == model_transformed.graph elif perm == [ 3, 2, 0, 1, ]: # the first dimension is also part of the transpose operation # so the graph will not be changed assert model.graph == model_transformed.graph # the following cases are the ones in which the model is transformed # because we tested the parameters shape and perm befire we can only consider ishape # and data_layout (the transformed model should only contain a "Flatten" node) elif ishape == [1, 1, 1, 4] and data_layout == "NHWC": assert model_transformed.graph.node[0].op_type == "Flatten" elif ishape == [2, 4, 1, 1] and data_layout == "NCHW" and shape is None: # If the first dimension of the input tensor is not 1, flatten and # reshape (with shape = [1, -1]) would lead to different results assert model_transformed.graph.node[0].op_type == "Flatten" # all other cases lead to an unchanged model else: assert model.graph == model_transformed.graph
def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape): np.random.seed(0) idt = DataType["UINT4"] odt = DataType["UINT4"] conv_weight_dt = DataType["INT4"] fc_weight_dt = DataType["INT4"] input_shape, kernel_shape, stride, pad = conv_config kernel_size_h, kernel_size_w = kernel_shape input_size_h, input_size_w = input_shape stride_h, stride_w = stride pad_h, pad_w = pad in_chn = 4 fc_filters = 16 if depthwise is True: group = out_chn = in_chn conv_param_shape = [out_chn, 1, kernel_size_h, kernel_size_w] else: group = 1 out_chn = 8 conv_param_shape = [out_chn, in_chn, kernel_size_h, kernel_size_w] output_size_h = compute_conv_output_dim(input_size_h, kernel_size_h, stride_h, 2 * pad_h) output_size_w = compute_conv_output_dim(input_size_w, kernel_size_w, stride_w, 2 * pad_w) input_shape = [1, in_chn, input_size_h, input_size_w] fc_param_shape = [out_chn * output_size_h * output_size_w, fc_filters] output_shape = [1, fc_filters] conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = group conv_config["kernel_shape"] = [kernel_size_h, kernel_size_w] conv_config["pads"] = [pad_h, pad_w, pad_h, pad_w] conv_config["strides"] = [stride_h, stride_w] global_in = helper.make_tensor_value_info("global_in", TensorProto.FLOAT, input_shape) global_out = helper.make_tensor_value_info("global_out", TensorProto.FLOAT, output_shape) value_info = [ helper.make_tensor_value_info("conv_param", TensorProto.FLOAT, conv_param_shape), helper.make_tensor_value_info("thres1_param", TensorProto.FLOAT, (out_chn, 15)), helper.make_tensor_value_info("matmul_param", TensorProto.FLOAT, fc_param_shape), helper.make_tensor_value_info("thres2_param", TensorProto.FLOAT, (fc_filters, 15)), helper.make_tensor_value_info("reshape_shape", TensorProto.INT64, []), ] if use_reshape: flatten_node = helper.make_node("Reshape", ["thres1_out", "reshape_shape"], ["flatten_out"]) else: flatten_node = helper.make_node("Flatten", ["thres1_out"], ["flatten_out"], axis=1) modelproto = helper.make_model( helper.make_graph( name="test", inputs=[global_in], outputs=[global_out], value_info=value_info, nodes=[ helper.make_node("Conv", ["global_in", "conv_param"], ["conv_out"], **conv_config), helper.make_node( "MultiThreshold", ["conv_out", "thres1_param"], ["thres1_out"], domain="finn.custom_op.general", out_dtype="UINT4", ), flatten_node, helper.make_node("MatMul", ["flatten_out", "matmul_param"], ["matmul_out"]), helper.make_node( "MultiThreshold", ["matmul_out", "thres2_param"], ["global_out"], domain="finn.custom_op.general", out_dtype="UINT4", ), ], )) model = ModelWrapper(modelproto) model.set_tensor_datatype("global_in", idt) model.set_tensor_layout("global_in", DataLayout.NCHW) model.set_tensor_datatype("global_out", odt) model.set_tensor_datatype("conv_param", conv_weight_dt) model.set_tensor_datatype("matmul_param", fc_weight_dt) model.set_tensor_datatype("thres1_param", DataType["INT32"]) model.set_tensor_datatype("thres2_param", DataType["INT32"]) model.set_initializer("conv_param", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)) model.set_initializer("thres1_param", get_multithreshold_rand_params(out_chn, 15, seed=0)) model.set_initializer( "thres2_param", get_multithreshold_rand_params(fc_filters, 15, seed=0)) model.set_initializer("matmul_param", gen_finn_dt_tensor(fc_weight_dt, fc_param_shape)) model.set_initializer("reshape_shape", np.array([1, -1])) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) # streamlining new_model = model.transform(MoveScalarLinearPastInvariants()) new_model = new_model.transform(Streamline()) new_model = new_model.transform(LowerConvsToMatMul()) new_model = new_model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) new_model = new_model.transform(Streamline()) new_model = new_model.transform(InferDataLayouts()) new_model = new_model.transform(RemoveUnusedTensors()) # convert_to_hls if depthwise is True: new_model = new_model.transform(to_hls.InferVVAU()) new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer()) new_model = new_model.transform(to_hls.InferThresholdingLayer()) new_model = new_model.transform(to_hls.InferConvInpGen()) new_model = new_model.transform(to_hls.InferStreamingMaxPool()) new_model = new_model.transform(RemoveCNVtoFCFlatten()) new_model = new_model.transform(absorb.AbsorbConsecutiveTransposes()) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(InferDataLayouts()) # prepare cppsim new_model = new_model.transform(PrepareCppSim()) new_model = new_model.transform(CompileCppSim()) new_model = new_model.transform(SetExecMode("cppsim")) # check for correct execution x = gen_finn_dt_tensor(idt, input_shape) inp_dict = {model.graph.input[0].name: x} assert oxe.compare_execution(model, new_model, inp_dict) num_transpose = len(new_model.get_nodes_by_op_type("Transpose")) num_flatten = len(new_model.get_nodes_by_op_type("Flatten")) num_reshape = len(new_model.get_nodes_by_op_type("Reshape")) # check if transpose->flatten was removed assert num_transpose == 1 and num_flatten == 0 and num_reshape == 0
def test_convert_to_hls_layers_cnv_w1a1(fused_activation): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv) model = ModelWrapper(export_onnx_path_cnv) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model = model.transform(InferDataLayouts()) # model.save("golden.onnx") # load one of the test vectors fn = pk.resource_filename("finn.qnn-data", "cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # generate expected value from streamlined net input_dict = {"global_in": input_tensor} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] # if we infer thresholding first, all MultiThresholds get converted to HLS # subsequently, the FC inference will generate passthrough MVAUs if not fused_activation: model = model.transform(to_hls.InferThresholdingLayer()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) for node in model.graph.node: if node.op_type == "StreamingFCLayer_Batch": inst = getCustomOp(node) inst.set_nodeattr("mem_mode", "decoupled") mw = inst.get_nodeattr("MW") mh = inst.get_nodeattr("MH") if mh % 4 == 0: pe = mh // 4 else: pe = mh inst.set_nodeattr("PE", pe) if mw % 16 == 0: simd = mw // 16 else: simd = mw inst.set_nodeattr("SIMD", simd) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) # check topology status finn_nodes = model.get_finn_nodes() if fused_activation: assert len(finn_nodes) == 18 else: assert len(finn_nodes) == 26 thr_nodes = model.get_nodes_by_op_type("Thresholding_Batch") assert len(thr_nodes) == 8 non_finn_nodes = model.get_non_finn_nodes() assert len(non_finn_nodes) == 4 exp_non_finn_nodes = ["Transpose", "Reshape", "Mul", "Add"] assert [x.op_type for x in non_finn_nodes] == exp_non_finn_nodes fc_nodes = model.get_nodes_by_op_type("StreamingFCLayer_Batch") assert len(fc_nodes) == 9 swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator") assert len(swg_nodes) == 6 mp_nodes = model.get_nodes_by_op_type("StreamingMaxPool_Batch") assert len(mp_nodes) == 2 # model.save("cnv-pre-compile.onnx") model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) # model.save("cnv-post-compile.onnx") produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path_cnv)
def apply(self, model): graph = model.graph graph_modified = False node_ind = 0 for n in graph.node: node_ind += 1 if (n.op_type == "Flatten" and not model.is_fork_node(n) and not model.is_join_node(n)): consumer = model.find_consumer(n.output[0]) if (consumer is not None and (consumer.op_type == "MatMul" or consumer.op_type == "Mul" or consumer.op_type == "Add") and not model.is_join_node(consumer)): # move flatten past operation and rewire tensors start_name = n.input[0] # check if datalyout is set to NHWC and H=W=1 datalayout = model.get_tensor_layout(start_name) if datalayout == DataLayout.NHWC: (b, h, w, c) = model.get_tensor_shape(start_name) if h != 1 or w != 1: warnings.warn( """The Transformation can only be performed if H=W=1.""") continue else: warnings.warn( """The Transformation can only be performed on operations that operate on data layout NHWC.""") continue middle_name = n.output[0] end_name = consumer.output[0] op_param_name = consumer.input[1] A = model.get_initializer(op_param_name) if A is None: warnings.warn("Param is not constant, skipping") continue op_in_dt = model.get_tensor_datatype(consumer.input[0]) op_out_dt = model.get_tensor_datatype(consumer.output[0]) start_shape = model.get_tensor_shape(start_name) dummy_in = np.random.uniform(low=0, high=1, size=(start_shape)) if consumer.op_type == "MatMul": dummy_out = np.matmul(dummy_in, A) elif consumer.op_type == "Mul": dummy_out = dummy_in * A elif consumer.op_type == "Add": dummy_out = dummy_in + A new_op = oh.make_node( consumer.op_type, [start_name, op_param_name], [middle_name], name=consumer.name, ) new_flatten = oh.make_node("Flatten", [middle_name], [end_name]) graph.node.insert(node_ind, new_op) graph.node.insert(node_ind + 1, new_flatten) model.set_tensor_shape(middle_name, dummy_out.shape) # because a flatten node doesn't change the datatype we need # only the datatype of the op node model.set_tensor_datatype(start_name, op_in_dt) model.set_tensor_datatype(middle_name, op_out_dt) model.set_tensor_datatype(end_name, op_out_dt) # set datalayout model.set_tensor_layout(start_name, DataLayout.NHWC) model.set_tensor_layout(middle_name, DataLayout.NHWC) # remove old nodes graph.node.remove(n) graph.node.remove(consumer) graph_modified = True model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) return (model, graph_modified)