Пример #1
0
def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Run streamlining on given model. Streamlining involves moving floating point
    scale/shift parameters around, collapsing adjacent ones into a single parameter,
    then absorbing the scale/shift into the following `MultiThreshold` node.
    Streamlining requires careful topology design and cannot be applied to all
    topologies.
    """

    model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold())
    model = model.transform(Streamline())
    need_lowering = len(model.get_nodes_by_op_type("Conv")) > 0
    if need_lowering:
        model = model.transform(LowerConvsToMatMul())
        model = model.transform(MakeMaxPoolNHWC())
        model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
        model = model.transform(MakeMaxPoolNHWC())
    model = model.transform(ConvertBipolarMatMulToXnorPopcount())
    model = model.transform(Streamline())
    # absorb final add-mul nodes into TopK
    model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
    model = model.transform(InferDataLayouts())
    model = model.transform(RemoveUnusedTensors())

    if VerificationStepType.STREAMLINED_PYTHON in cfg._resolve_verification_steps(
    ):
        verify_step(model, cfg, "streamlined_python", need_parent=False)

    return model
Пример #2
0
def test_end2end_cnv_w1a1_streamline():
    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_tidy.onnx")
    model = model.transform(Streamline())
    model = model.transform(LowerConvsToMatMul())
    model = model.transform(MakeMaxPoolNHWC())
    model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
    model = model.transform(ConvertBipolarMatMulToXnorPopcount())
    model = model.transform(Streamline())
    model.save(build_dir + "/end2end_cnv_w1a1_streamlined.onnx")
Пример #3
0
 def test_streamline(self, topology, wbits, abits):
     prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post")
     model = load_test_checkpoint_or_skip(prev_chkpt_name)
     # move past any reshapes to be able to streamline input scaling
     model = model.transform(MoveScalarLinearPastInvariants())
     model = model.transform(Streamline())
     if "fc" not in topology:
         model = model.transform(LowerConvsToMatMul())
         model = model.transform(MakeMaxPoolNHWC())
         model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
     model = model.transform(ConvertBipolarMatMulToXnorPopcount())
     model = model.transform(Streamline())
     # absorb final add-mul nodes into TopK
     model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
     model = model.transform(InferDataLayouts())
     model = model.transform(RemoveUnusedTensors())
     model.save(get_checkpoint_name(topology, wbits, abits, "streamline"))
def test_convert_to_hls_layers_cnv_w1a1(fused_activation):
    cnv = get_test_model_trained("CNV", 1, 1)
    bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv)
    model = ModelWrapper(export_onnx_path_cnv)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(Streamline())
    model = model.transform(LowerConvsToMatMul())
    model = model.transform(MakeMaxPoolNHWC())
    model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
    model = model.transform(ConvertBipolarMatMulToXnorPopcount())
    model = model.transform(Streamline())
    model = model.transform(InferDataLayouts())
    # model.save("golden.onnx")
    # load one of the test vectors
    fn = pk.resource_filename("finn.qnn-data",
                              "cifar10/cifar10-test-data-class3.npz")
    input_tensor = np.load(fn)["arr_0"].astype(np.float32)
    input_tensor = input_tensor / 255
    assert input_tensor.shape == (1, 3, 32, 32)
    # generate expected value from streamlined net
    input_dict = {"global_in": input_tensor}
    expected_ctx = oxe.execute_onnx(model, input_dict, True)
    expected = expected_ctx[model.graph.output[0].name]

    # if we infer thresholding first, all MultiThresholds get converted to HLS
    # subsequently, the FC inference will generate passthrough MVAUs
    if not fused_activation:
        model = model.transform(to_hls.InferThresholdingLayer())
    model = model.transform(to_hls.InferBinaryStreamingFCLayer())
    model = model.transform(to_hls.InferQuantizedStreamingFCLayer())
    for node in model.graph.node:
        if node.op_type == "StreamingFCLayer_Batch":
            inst = getCustomOp(node)
            inst.set_nodeattr("mem_mode", "decoupled")
            mw = inst.get_nodeattr("MW")
            mh = inst.get_nodeattr("MH")
            if mh % 4 == 0:
                pe = mh // 4
            else:
                pe = mh
            inst.set_nodeattr("PE", pe)
            if mw % 16 == 0:
                simd = mw // 16
            else:
                simd = mw
            inst.set_nodeattr("SIMD", simd)
    model = model.transform(to_hls.InferConvInpGen())
    model = model.transform(to_hls.InferStreamingMaxPool())
    # check topology status
    finn_nodes = model.get_finn_nodes()
    if fused_activation:
        assert len(finn_nodes) == 18
    else:
        assert len(finn_nodes) == 26
        thr_nodes = model.get_nodes_by_op_type("Thresholding_Batch")
        assert len(thr_nodes) == 8
    non_finn_nodes = model.get_non_finn_nodes()
    assert len(non_finn_nodes) == 4
    exp_non_finn_nodes = ["Transpose", "Reshape", "Mul", "Add"]
    assert [x.op_type for x in non_finn_nodes] == exp_non_finn_nodes
    fc_nodes = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
    assert len(fc_nodes) == 9
    swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator")
    assert len(swg_nodes) == 6
    mp_nodes = model.get_nodes_by_op_type("StreamingMaxPool_Batch")
    assert len(mp_nodes) == 2
    # model.save("cnv-pre-compile.onnx")
    model = model.transform(PrepareCppSim())
    model = model.transform(CompileCppSim())
    model = model.transform(SetExecMode("cppsim"))
    # model.save("cnv-post-compile.onnx")
    produced_ctx = oxe.execute_onnx(model, input_dict, True)
    produced = produced_ctx[model.graph.output[0].name]
    assert np.isclose(expected, produced, atol=1e-3).all()
    assert np.argmax(produced) == 3
    os.remove(export_onnx_path_cnv)
def test_infer_data_layouts_cnv():
    cnv = get_test_model_trained("CNV", 1, 1)
    bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv)
    model = ModelWrapper(export_onnx_path_cnv)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(Streamline())
    model = model.transform(InferDataLayouts())

    assert model.get_tensor_layout("global_in") == DataLayout.NCHW
    assert model.get_tensor_layout("Conv_0_out0") == DataLayout.NCHW
    assert model.get_tensor_layout("MaxPool_0_out0") == DataLayout.NCHW
    assert model.get_tensor_layout("MultiThreshold_6_out0") == DataLayout.NCHW
    assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC
    assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NC
    assert model.get_tensor_layout("global_out") == DataLayout.NC

    model = model.transform(LowerConvsToMatMul())
    model = model.transform(MakeMaxPoolNHWC())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferDataLayouts())

    assert model.get_tensor_layout("global_in") == DataLayout.NCHW
    assert model.get_tensor_layout("Transpose_0_out0") == DataLayout.NHWC
    assert model.get_tensor_layout("Im2Col_0_out0") == DataLayout.NHWC
    # note: im2col output isn't really NHWC or any other common layout
    # since the concept of channels changes with lowering... but it is
    # conceptually close to NHWC since the innermost dim gets multiplied
    assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NHWC
    assert model.get_tensor_layout("Transpose_1_out0") == DataLayout.NCHW
    assert model.get_tensor_layout("Transpose_2_out0") == DataLayout.NHWC
    assert model.get_tensor_layout("MaxPoolNHWC_0_out0") == DataLayout.NHWC
    assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC
    assert model.get_tensor_layout("global_out") == DataLayout.NC

    model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
    model = model.transform(ConvertBipolarMatMulToXnorPopcount())
    model = model.transform(Streamline())
    model = model.transform(to_hls.InferBinaryStreamingFCLayer())
    model = model.transform(to_hls.InferQuantizedStreamingFCLayer())
    model = model.transform(to_hls.InferConvInpGen())
    model = model.transform(to_hls.InferStreamingMaxPool())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferDataLayouts())

    assert model.get_tensor_layout("global_in") == DataLayout.NCHW
    assert model.get_tensor_layout("Transpose_0_out0") == DataLayout.NHWC
    # note: im2col output isn't really NHWC or any other common layout
    # since the concept of channels changes with lowering... but it is
    # conceptually close to NHWC since the innermost dim gets multiplied
    assert (model.get_tensor_layout("ConvolutionInputGenerator_0_out0") ==
            DataLayout.NHWC)
    assert model.get_tensor_layout(
        "StreamingFCLayer_Batch_3_out0") == DataLayout.NHWC
    assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC
    assert model.get_tensor_layout(
        "StreamingFCLayer_Batch_6_out0") == DataLayout.NC
    assert model.get_tensor_layout("global_out") == DataLayout.NC

    os.remove(export_onnx_path_cnv)