def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig): """Run streamlining on given model. Streamlining involves moving floating point scale/shift parameters around, collapsing adjacent ones into a single parameter, then absorbing the scale/shift into the following `MultiThreshold` node. Streamlining requires careful topology design and cannot be applied to all topologies. """ model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold()) model = model.transform(Streamline()) need_lowering = len(model.get_nodes_by_op_type("Conv")) > 0 if need_lowering: model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) # absorb final add-mul nodes into TopK model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) if VerificationStepType.STREAMLINED_PYTHON in cfg._resolve_verification_steps( ): verify_step(model, cfg, "streamlined_python", need_parent=False) return model
def step_mobilenet_lower_convs(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(LowerConvsToMatMul()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RoundAndClipThresholds()) model = model.transform(InferDataLayouts()) return model
def test_end2end_cnv_w1a1_streamline(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_tidy.onnx") model = model.transform(Streamline()) model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model.save(build_dir + "/end2end_cnv_w1a1_streamlined.onnx")
def test_end2end_mobilenet_lowering(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_streamlined.onnx") model = model.transform(LowerConvsToMatMul()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RoundAndClipThresholds()) model.save(build_dir + "/end2end_mobilenet_lowered.onnx")
def test_streamline(self, topology, wbits, abits): prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post") model = load_test_checkpoint_or_skip(prev_chkpt_name) # move past any reshapes to be able to streamline input scaling model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(Streamline()) if "fc" not in topology: model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) # absorb final add-mul nodes into TopK model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) model.save(get_checkpoint_name(topology, wbits, abits, "streamline"))
def test_convert_to_hls_layers_cnv_w1a1(fused_activation): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv) model = ModelWrapper(export_onnx_path_cnv) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model = model.transform(InferDataLayouts()) # model.save("golden.onnx") # load one of the test vectors fn = pk.resource_filename("finn.qnn-data", "cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # generate expected value from streamlined net input_dict = {"global_in": input_tensor} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] # if we infer thresholding first, all MultiThresholds get converted to HLS # subsequently, the FC inference will generate passthrough MVAUs if not fused_activation: model = model.transform(to_hls.InferThresholdingLayer()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) for node in model.graph.node: if node.op_type == "StreamingFCLayer_Batch": inst = getCustomOp(node) inst.set_nodeattr("mem_mode", "decoupled") mw = inst.get_nodeattr("MW") mh = inst.get_nodeattr("MH") if mh % 4 == 0: pe = mh // 4 else: pe = mh inst.set_nodeattr("PE", pe) if mw % 16 == 0: simd = mw // 16 else: simd = mw inst.set_nodeattr("SIMD", simd) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) # check topology status finn_nodes = model.get_finn_nodes() if fused_activation: assert len(finn_nodes) == 18 else: assert len(finn_nodes) == 26 thr_nodes = model.get_nodes_by_op_type("Thresholding_Batch") assert len(thr_nodes) == 8 non_finn_nodes = model.get_non_finn_nodes() assert len(non_finn_nodes) == 4 exp_non_finn_nodes = ["Transpose", "Reshape", "Mul", "Add"] assert [x.op_type for x in non_finn_nodes] == exp_non_finn_nodes fc_nodes = model.get_nodes_by_op_type("StreamingFCLayer_Batch") assert len(fc_nodes) == 9 swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator") assert len(swg_nodes) == 6 mp_nodes = model.get_nodes_by_op_type("StreamingMaxPool_Batch") assert len(mp_nodes) == 2 # model.save("cnv-pre-compile.onnx") model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) # model.save("cnv-post-compile.onnx") produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path_cnv)
def test_infer_data_layouts_cnv(): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv) model = ModelWrapper(export_onnx_path_cnv) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Conv_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("MaxPool_0_out0") == DataLayout.NCHW assert model.get_tensor_layout("MultiThreshold_6_out0") == DataLayout.NCHW assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Transpose_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("Im2Col_0_out0") == DataLayout.NHWC # note: im2col output isn't really NHWC or any other common layout # since the concept of channels changes with lowering... but it is # conceptually close to NHWC since the innermost dim gets multiplied assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("Transpose_1_out0") == DataLayout.NCHW assert model.get_tensor_layout("Transpose_2_out0") == DataLayout.NHWC assert model.get_tensor_layout("MaxPoolNHWC_0_out0") == DataLayout.NHWC assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) assert model.get_tensor_layout("global_in") == DataLayout.NCHW assert model.get_tensor_layout("Transpose_0_out0") == DataLayout.NHWC # note: im2col output isn't really NHWC or any other common layout # since the concept of channels changes with lowering... but it is # conceptually close to NHWC since the innermost dim gets multiplied assert (model.get_tensor_layout("ConvolutionInputGenerator_0_out0") == DataLayout.NHWC) assert model.get_tensor_layout( "StreamingFCLayer_Batch_3_out0") == DataLayout.NHWC assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC assert model.get_tensor_layout( "StreamingFCLayer_Batch_6_out0") == DataLayout.NC assert model.get_tensor_layout("global_out") == DataLayout.NC os.remove(export_onnx_path_cnv)
def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape): np.random.seed(0) idt = DataType["UINT4"] odt = DataType["UINT4"] conv_weight_dt = DataType["INT4"] fc_weight_dt = DataType["INT4"] input_shape, kernel_shape, stride, pad = conv_config kernel_size_h, kernel_size_w = kernel_shape input_size_h, input_size_w = input_shape stride_h, stride_w = stride pad_h, pad_w = pad in_chn = 4 fc_filters = 16 if depthwise is True: group = out_chn = in_chn conv_param_shape = [out_chn, 1, kernel_size_h, kernel_size_w] else: group = 1 out_chn = 8 conv_param_shape = [out_chn, in_chn, kernel_size_h, kernel_size_w] output_size_h = compute_conv_output_dim(input_size_h, kernel_size_h, stride_h, 2 * pad_h) output_size_w = compute_conv_output_dim(input_size_w, kernel_size_w, stride_w, 2 * pad_w) input_shape = [1, in_chn, input_size_h, input_size_w] fc_param_shape = [out_chn * output_size_h * output_size_w, fc_filters] output_shape = [1, fc_filters] conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = group conv_config["kernel_shape"] = [kernel_size_h, kernel_size_w] conv_config["pads"] = [pad_h, pad_w, pad_h, pad_w] conv_config["strides"] = [stride_h, stride_w] global_in = helper.make_tensor_value_info("global_in", TensorProto.FLOAT, input_shape) global_out = helper.make_tensor_value_info("global_out", TensorProto.FLOAT, output_shape) value_info = [ helper.make_tensor_value_info("conv_param", TensorProto.FLOAT, conv_param_shape), helper.make_tensor_value_info("thres1_param", TensorProto.FLOAT, (out_chn, 15)), helper.make_tensor_value_info("matmul_param", TensorProto.FLOAT, fc_param_shape), helper.make_tensor_value_info("thres2_param", TensorProto.FLOAT, (fc_filters, 15)), helper.make_tensor_value_info("reshape_shape", TensorProto.INT64, []), ] if use_reshape: flatten_node = helper.make_node("Reshape", ["thres1_out", "reshape_shape"], ["flatten_out"]) else: flatten_node = helper.make_node("Flatten", ["thres1_out"], ["flatten_out"], axis=1) modelproto = helper.make_model( helper.make_graph( name="test", inputs=[global_in], outputs=[global_out], value_info=value_info, nodes=[ helper.make_node("Conv", ["global_in", "conv_param"], ["conv_out"], **conv_config), helper.make_node( "MultiThreshold", ["conv_out", "thres1_param"], ["thres1_out"], domain="finn.custom_op.general", out_dtype="UINT4", ), flatten_node, helper.make_node("MatMul", ["flatten_out", "matmul_param"], ["matmul_out"]), helper.make_node( "MultiThreshold", ["matmul_out", "thres2_param"], ["global_out"], domain="finn.custom_op.general", out_dtype="UINT4", ), ], )) model = ModelWrapper(modelproto) model.set_tensor_datatype("global_in", idt) model.set_tensor_layout("global_in", DataLayout.NCHW) model.set_tensor_datatype("global_out", odt) model.set_tensor_datatype("conv_param", conv_weight_dt) model.set_tensor_datatype("matmul_param", fc_weight_dt) model.set_tensor_datatype("thres1_param", DataType["INT32"]) model.set_tensor_datatype("thres2_param", DataType["INT32"]) model.set_initializer("conv_param", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)) model.set_initializer("thres1_param", get_multithreshold_rand_params(out_chn, 15, seed=0)) model.set_initializer( "thres2_param", get_multithreshold_rand_params(fc_filters, 15, seed=0)) model.set_initializer("matmul_param", gen_finn_dt_tensor(fc_weight_dt, fc_param_shape)) model.set_initializer("reshape_shape", np.array([1, -1])) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) # streamlining new_model = model.transform(MoveScalarLinearPastInvariants()) new_model = new_model.transform(Streamline()) new_model = new_model.transform(LowerConvsToMatMul()) new_model = new_model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) new_model = new_model.transform(Streamline()) new_model = new_model.transform(InferDataLayouts()) new_model = new_model.transform(RemoveUnusedTensors()) # convert_to_hls if depthwise is True: new_model = new_model.transform(to_hls.InferVVAU()) new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer()) new_model = new_model.transform(to_hls.InferThresholdingLayer()) new_model = new_model.transform(to_hls.InferConvInpGen()) new_model = new_model.transform(to_hls.InferStreamingMaxPool()) new_model = new_model.transform(RemoveCNVtoFCFlatten()) new_model = new_model.transform(absorb.AbsorbConsecutiveTransposes()) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(InferDataLayouts()) # prepare cppsim new_model = new_model.transform(PrepareCppSim()) new_model = new_model.transform(CompileCppSim()) new_model = new_model.transform(SetExecMode("cppsim")) # check for correct execution x = gen_finn_dt_tensor(idt, input_shape) inp_dict = {model.graph.input[0].name: x} assert oxe.compare_execution(model, new_model, inp_dict) num_transpose = len(new_model.get_nodes_by_op_type("Transpose")) num_flatten = len(new_model.get_nodes_by_op_type("Flatten")) num_reshape = len(new_model.get_nodes_by_op_type("Reshape")) # check if transpose->flatten was removed assert num_transpose == 1 and num_flatten == 0 and num_reshape == 0