Exemplo n.º 1
0
def test_streamline_fc(size, wbits, abits):
    if size == "LFC" and wbits == 2 and abits == 2:
        pytest.skip("No LFC-w2a2 present at the moment")
    if wbits > abits:
        pytest.skip("No wbits > abits cases at the moment")
    nname = "%s_%dW%dA" % (size, wbits, abits)
    finn_onnx = export_onnx_path + "/%s.onnx" % nname
    fc = get_test_model_trained(size, wbits, abits)
    bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx)
    model = ModelWrapper(finn_onnx)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(RemoveStaticGraphInputs())
    # load one of the test vectors
    raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
    input_tensor = onnx.load_tensor_from_string(raw_i)
    # run using FINN-based execution
    input_dict = {"global_in": nph.to_array(input_tensor)}
    expected_ctx = oxe.execute_onnx(model, input_dict, True)
    expected = expected_ctx[model.graph.output[0].name]
    model = model.transform(Streamline())
    model = model.transform(RemoveUnusedTensors())
    assert len(model.graph.initializer) == 11
    assert len(model.graph.value_info) == 21
    assert len(model.graph.quantization_annotation) == 20
    produced_ctx = oxe.execute_onnx(model, input_dict, True)
    produced = produced_ctx[model.graph.output[0].name]
    assert np.isclose(expected, produced, atol=1e-3).all()
Exemplo n.º 2
0
def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Run streamlining on given model. Streamlining involves moving floating point
    scale/shift parameters around, collapsing adjacent ones into a single parameter,
    then absorbing the scale/shift into the following `MultiThreshold` node.
    Streamlining requires careful topology design and cannot be applied to all
    topologies.
    """

    model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold())
    model = model.transform(Streamline())
    need_lowering = len(model.get_nodes_by_op_type("Conv")) > 0
    if need_lowering:
        model = model.transform(LowerConvsToMatMul())
        model = model.transform(MakeMaxPoolNHWC())
        model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
        model = model.transform(MakeMaxPoolNHWC())
    model = model.transform(ConvertBipolarMatMulToXnorPopcount())
    model = model.transform(Streamline())
    # absorb final add-mul nodes into TopK
    model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
    model = model.transform(InferDataLayouts())
    model = model.transform(RemoveUnusedTensors())

    if VerificationStepType.STREAMLINED_PYTHON in cfg._resolve_verification_steps(
    ):
        verify_step(model, cfg, "streamlined_python", need_parent=False)

    return model
Exemplo n.º 3
0
def test_streamline_cnv(size, wbits, abits):
    if wbits > abits:
        pytest.skip("No wbits > abits cases at the moment")
    nname = "%s_%dW%dA" % (size, wbits, abits)
    finn_onnx = export_onnx_path + "/%s.onnx" % nname
    fc = get_test_model_trained(size, wbits, abits)
    bo.export_finn_onnx(fc, (1, 3, 32, 32), finn_onnx)
    model = ModelWrapper(finn_onnx)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(RemoveStaticGraphInputs())
    # load one of the test vectors
    fn = pk.resource_filename("finn",
                              "data/cifar10/cifar10-test-data-class3.npz")
    input_tensor = np.load(fn)["arr_0"].astype(np.float32)
    input_tensor = input_tensor / 255
    assert input_tensor.shape == (1, 3, 32, 32)
    # run using FINN-based execution
    input_dict = {"global_in": input_tensor}
    expected_ctx = oxe.execute_onnx(model, input_dict, True)
    expected = expected_ctx[model.graph.output[0].name]
    # model.save("orig_cnv.onnx")
    model = model.transform(Streamline())
    model = model.transform(RemoveUnusedTensors())
    assert len(model.graph.initializer) == 21
    assert len(model.graph.value_info) == 43
    # model.save("streamlined_cnv.onnx")
    assert len(model.graph.node) == 23
    produced_ctx = oxe.execute_onnx(model, input_dict, True)
    produced = produced_ctx[model.graph.output[0].name]
    assert np.isclose(expected, produced, atol=1e-3).all()
    assert model.graph.node[0].op_type == "MultiThreshold"
    assert np.argmax(produced) == 3
Exemplo n.º 4
0
def test_end2end_mobilenet_create_dataflow_partition():
    model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_folded.onnx")
    parent_model = model.transform(CreateDataflowPartition())
    parent_model.save(build_dir + "/end2end_mobilenet_dataflow_parent.onnx")
    sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
    sdp_node = getCustomOp(sdp_node)
    dataflow_model_filename = sdp_node.get_nodeattr("model")
    dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename)
    dataflow_model = dataflow_model.transform(RemoveUnusedTensors())
    dataflow_model.save(build_dir + "/end2end_mobilenet_dataflow_model.onnx")
Exemplo n.º 5
0
 def cleanup(self):
     "Run cleanup transformations on the model."
     transformed_model = self
     cleanup_transforms = [
         RemoveUnusedTensors(),
         RemoveStaticGraphInputs(),
         SortGraph(),
     ]
     for trn in cleanup_transforms:
         transformed_model = transformed_model.transform(
             trn, cleanup=False, make_deepcopy=False)
     return transformed_model
Exemplo n.º 6
0
    def apply(self, model):
        _check_vitis_envvars()
        # first infer layouts
        model = model.transform(InferDataLayouts())
        # prepare at global level, then break up into kernels
        prep_transforms = [InsertIODMA(512), InsertDWC()]
        for trn in prep_transforms:
            model = model.transform(trn)
            model = model.transform(GiveUniqueNodeNames())
            model = model.transform(GiveReadableTensorNames())

        model = model.transform(Floorplan(floorplan=self.floorplan_file))

        model = model.transform(CreateDataflowPartition())
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(GiveReadableTensorNames())

        # Build each kernel individually
        sdp_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition")
        for sdp_node in sdp_nodes:
            sdp_node = getCustomOp(sdp_node)
            dataflow_model_filename = sdp_node.get_nodeattr("model")
            kernel_model = ModelWrapper(dataflow_model_filename)
            kernel_model = kernel_model.transform(InsertFIFO())
            kernel_model = kernel_model.transform(RemoveUnusedTensors())
            kernel_model = kernel_model.transform(GiveUniqueNodeNames())
            kernel_model.save(dataflow_model_filename)
            kernel_model = kernel_model.transform(
                PrepareIP(self.fpga_part, self.period_ns))
            kernel_model = kernel_model.transform(HLSSynthIP())
            kernel_model = kernel_model.transform(
                CreateStitchedIP(self.fpga_part, self.period_ns,
                                 sdp_node.onnx_node.name, True))
            kernel_model = kernel_model.transform(
                CreateVitisXO(sdp_node.onnx_node.name))
            kernel_model.set_metadata_prop("platform", "alveo")
            kernel_model.save(dataflow_model_filename)
        # Assemble design from kernels
        if self.enable_link:
            model = model.transform(
                VitisLink(
                    self.platform,
                    round(1000 / self.period_ns),
                    strategy=self.strategy,
                    enable_debug=self.enable_debug,
                ))
        # set platform attribute for correct remote execution
        model.set_metadata_prop("platform", "alveo")

        # create driver
        model = model.transform(MakePYNQDriver(platform="alveo"))
        return (model, False)
Exemplo n.º 7
0
def inference_cost(model_filename,
                   *,
                   output_json=None,
                   output_onnx=None,
                   preprocess=True,
                   discount_sparsity=True):
    """Print the inference cost estimate metric for given ONNX model.
    Supports the Quant op for weight/activation quantization.

    :param model_filename: Filename for ONNX model
    :param output_json: Optional JSON filename to save the inference cost dict
    :param output_onnx: Optional ONNX filename to save the final model after any
        preprocessing
    :param preprocess: If set, run preprocessing steps such as shape inference,
        datatype inference and constant folding. Strongly recommended.
    :param discount_sparsity: If set, will discount op cost of MAC ops with a
        constant zero weight, and the mem cost of constant zero weights.
    """
    print("Inference cost for " + model_filename)
    model = ModelWrapper(model_filename)
    if preprocess:
        qnt_nodes = model.get_nodes_by_op_type("Quant")
        for qnt_node in qnt_nodes:
            qnt_node.domain = "finn.custom_op.general"
        model = model.transform(InferShapes())
        model = model.transform(GiveUniqueParameterTensors())
        model = model.transform(InferDataTypes())
        model = model.transform(FoldConstants())
        model = model.transform(RemoveUnusedTensors())
        model = model.transform(RemoveStaticGraphInputs())
        model = model.transform(InferDataTypes())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    if output_onnx is not None:
        model.save(output_onnx)
    ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity))
    bops = compute_bops(ret)
    mem_w_bits = compute_mem_bits(ret, "mem_w")
    mem_o_bits = compute_mem_bits(ret, "mem_o")
    ret["total_bops"] = bops
    ret["total_mem_w_bits"] = mem_w_bits
    ret["total_mem_o_bits"] = mem_o_bits

    if "unsupported" in ret:
        ret["unsupported"] = str(ret["unsupported"])
    print(json.dumps(ret, sort_keys=True, indent=2))

    if output_json is not None:
        with open(output_json, "w") as f:
            json.dump(ret, f, sort_keys=True, indent=2)
Exemplo n.º 8
0
def step_resnet50_streamline(model: ModelWrapper, cfg: DataflowBuildConfig):

    for iter_id in range(4):
        model = step_resnet50_streamline_linear(model, cfg)
        model = step_resnet50_streamline_nonlinear(model, cfg)

        # big loop tidy up
        model = model.transform(RemoveUnusedTensors())
        model = model.transform(GiveReadableTensorNames())
        model = model.transform(InferDataTypes())
        model = model.transform(SortGraph())

    model = model.transform(DoubleToSingleFloat())

    return model
Exemplo n.º 9
0
def streamline(model, binary=True):
    log("Streamline transformations launched")
    model = model.transform(MoveScalarLinearPastInvariants())
    model = model.transform(Streamline())
    # Absorb add and mul in thresholds
    model = model.transform(absorb.AbsorbAddIntoMultiThreshold())
    model = model.transform(absorb.AbsorbMulIntoMultiThreshold())
    # Absorb add-mul in top-k
    model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
    model = model.transform(RoundAndClipThresholds())
    # Tidy-up
    model = model.transform(InferDataLayouts())
    model = model.transform(RemoveUnusedTensors())
    log("Streamline transformations completed")
    save(model, "3_streamlined")
    return model
Exemplo n.º 10
0
 def test_streamline(self, topology, wbits, abits):
     prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post")
     model = load_test_checkpoint_or_skip(prev_chkpt_name)
     # move past any reshapes to be able to streamline input scaling
     model = model.transform(MoveScalarLinearPastInvariants())
     model = model.transform(Streamline())
     if "fc" not in topology:
         model = model.transform(LowerConvsToMatMul())
         model = model.transform(MakeMaxPoolNHWC())
         model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
     model = model.transform(ConvertBipolarMatMulToXnorPopcount())
     model = model.transform(Streamline())
     # absorb final add-mul nodes into TopK
     model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
     model = model.transform(InferDataLayouts())
     model = model.transform(RemoveUnusedTensors())
     model.save(get_checkpoint_name(topology, wbits, abits, "streamline"))
Exemplo n.º 11
0
def step_resnet50_convert_to_hls(model: ModelWrapper,
                                 cfg: DataflowBuildConfig):
    model.set_tensor_datatype(model.graph.input[0].name, DataType["UINT8"])
    model = model.transform(InferDataLayouts())

    try:
        from finn.transformation.fpgadataflow.infer_doublepacked_dsp import InferDoublePackedConv
        model = model.transform(InferDoublePackedConv([1]))
    except:
        print(
            " FINN Experimental not available. Using non-packed convolution ")

    model = model.transform(DoubleToSingleFloat())
    model = model.transform(InferDataTypes())
    model = model.transform(SortGraph())

    to_hls_transformations = [
        to_hls.InferAddStreamsLayer, LowerConvsToMatMul,
        to_hls.InferChannelwiseLinearLayer, to_hls.InferPool_Batch,
        AbsorbTransposeIntoMultiThreshold, RoundAndClipThresholds,
        to_hls.InferQuantizedStreamingFCLayer, to_hls.InferThresholdingLayer,
        AbsorbConsecutiveTransposes, to_hls.InferConvInpGen,
        to_hls.InferDuplicateStreamsLayer, to_hls.InferLabelSelectLayer
    ]
    for trn in to_hls_transformations:
        model = model.transform(trn())
        model = model.transform(InferDataLayouts())
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(InferDataTypes())

    model = model.transform(RemoveCNVtoFCFlatten())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(RemoveUnusedTensors())
    model = model.transform(SortGraph())

    return model
Exemplo n.º 12
0
    def apply(self, model):
        graph_modified = False

        invalid_nodes = _find_invalid_nodes(model)
        if len(invalid_nodes) > 0:
            warnings.warn("Transformation is not applied,\
                 found unsupported nodes in the graph: {}.".format(
                invalid_nodes))
            return (model, graph_modified)

        # Infer the shapes of each tensor, remove unused tensors
        # and give each tensor a readable name
        model = model.transform(InferShapes())
        model = model.transform(RemoveUnusedTensors())

        # This list contains all nodes with initializers that need to be converted
        nodes_with_initializers = ["Mul", "Conv", "Add", "Div", "Reshape"]
        # Obtain a list of initializer names (used to filter out only value infos)
        initializers_names = [x.name for x in model.graph.initializer]

        all_tensors = {}
        # Extract the inputs
        all_tensors = {
            **all_tensors,
            **{
                x.name: [
                    x.type.tensor_type.elem_type,
                    model.get_tensor_shape(x.name)
                ]
                for x in model.graph.input
            },
        }
        # Extract only the output tensors
        all_tensors = {
            **all_tensors,
            **{
                x.name: [
                    x.type.tensor_type.elem_type,
                    model.get_tensor_shape(x.name)
                ]
                for x in model.graph.value_info if x.name not in initializers_names
            },
        }
        # Extract only initializers from nodes that are relevant for conversion
        all_tensors = {
            **all_tensors,
            **{
                x.name: [x.data_type, x.dims]
                for x in model.graph.initializer if model.find_consumers(x.name)[0].op_type in nodes_with_initializers
            },
        }
        # Extract the outputs
        all_tensors = {
            **all_tensors,
            **{
                x.name: [
                    x.type.tensor_type.elem_type,
                    model.get_tensor_shape(x.name)
                ]
                for x in model.graph.output
            },
        }

        # The list below contains tensor names that are the output of nodes that
        # reduce the tensor's dimension. The shape of these tensors also needs
        # to be extended
        tensors_reduced_dimension = []
        for n in model.graph.node:
            node_op_type = n.op_type
            input_shape = model.get_tensor_shape(n.input[0])
            # Find tensors that are the output of nodes that reduce the dimension
            if node_op_type == "ArgMax":
                keep_dims = get_by_name(n.attribute, "keepdims", "name").i
                if len(input_shape) == 3 and keep_dims == 0:
                    node_out = n.output
                    for n_o in node_out:
                        tensors_reduced_dimension.append(n_o)
            # Each node from the list of supported nodes is made compatible
            # with 4D tensors
            if node_op_type == "Transpose":
                perm = get_by_name(n.attribute, "perm", "name").ints
                if (
                        len(perm) == 3
                ):  # Meaning that the transpose operation was on a 3D tensor
                    perm.append(3)  # append 4th dimension
            elif node_op_type in ["ArgMax", "LogSoftMax", "TopK", "Flatten"]:
                axis = get_by_name(n.attribute, "axis", "name")
                if len(input_shape) == 3 and axis.i < 0:
                    axis.i = 3 + axis.i  # count dimensions from the front
            elif node_op_type == "Conv":
                dilations = get_by_name(n.attribute, "dilations", "name").ints
                kernel_shape = get_by_name(n.attribute, "kernel_shape",
                                           "name").ints
                pads = get_by_name(n.attribute, "pads", "name").ints
                strides = get_by_name(n.attribute, "strides", "name").ints
                if len(dilations) == 1:  # we must add another dimension to it
                    dilations.append(
                        1
                    )  # only equal dilation value along each spatial axis is supported
                if len(kernel_shape
                       ) == 1:  # we must add another dimension to it
                    kernel_shape.append(1)
                if (
                        len(pads) == 2
                ):  # pads = [x1_begin, x1_end] --> [x1_begin, x2_begin, x1_end, x2_end]
                    pads.insert(1, 0)
                    pads.append(0)
                if len(strides) == 1:  # strides = [stride_h, stride_w]
                    strides.append(1)
            elif node_op_type == "MaxPool":
                kernel_shape = get_by_name(n.attribute, "kernel_shape",
                                           "name").ints
                pads = get_by_name(n.attribute, "pads", "name").ints
                strides = get_by_name(n.attribute, "strides", "name").ints
                if len(kernel_shape
                       ) == 1:  # we must add another dimension to it
                    kernel_shape.append(1)
                if (
                        len(pads) == 2
                ):  # pads = [x1_begin, x1_end] --> [x1_begin, x2_begin, x1_end, x2_end]
                    pads.insert(1, 0)
                    pads.append(0)
                if len(strides) == 1:  # strides = [stride_h, stride_w]
                    strides.append(1)

        # Change format of each input/value_info/output tensor
        for k, v in all_tensors.items():
            tensor_type = v[0]
            shape = v[1]
            # Add extra dimension for tensors that either:
            # 1) Have 3 dimensions ( (N,C,H) -> (N,C,H,1) )
            # 2) Come after operations that reduce their dimension: e.g. {Argmax, ...}
            if len(shape) == 3 or k in tensors_reduced_dimension:
                shape.append(1)
                model.set_tensor_shape(k, shape, tensor_type)

        return (model, graph_modified)
def test_convert_to_hls_conv_fc_transition(conv_config, depthwise,
                                           use_reshape):
    np.random.seed(0)
    idt = DataType["UINT4"]
    odt = DataType["UINT4"]
    conv_weight_dt = DataType["INT4"]
    fc_weight_dt = DataType["INT4"]

    input_shape, kernel_shape, stride, pad = conv_config
    kernel_size_h, kernel_size_w = kernel_shape
    input_size_h, input_size_w = input_shape
    stride_h, stride_w = stride
    pad_h, pad_w = pad

    in_chn = 4
    fc_filters = 16

    if depthwise is True:
        group = out_chn = in_chn
        conv_param_shape = [out_chn, 1, kernel_size_h, kernel_size_w]
    else:
        group = 1
        out_chn = 8
        conv_param_shape = [out_chn, in_chn, kernel_size_h, kernel_size_w]

    output_size_h = compute_conv_output_dim(input_size_h, kernel_size_h,
                                            stride_h, 2 * pad_h)
    output_size_w = compute_conv_output_dim(input_size_w, kernel_size_w,
                                            stride_w, 2 * pad_w)

    input_shape = [1, in_chn, input_size_h, input_size_w]
    fc_param_shape = [out_chn * output_size_h * output_size_w, fc_filters]
    output_shape = [1, fc_filters]

    conv_config = {}
    conv_config["dilations"] = [1, 1]
    conv_config["group"] = group
    conv_config["kernel_shape"] = [kernel_size_h, kernel_size_w]
    conv_config["pads"] = [pad_h, pad_w, pad_h, pad_w]
    conv_config["strides"] = [stride_h, stride_w]

    global_in = helper.make_tensor_value_info("global_in", TensorProto.FLOAT,
                                              input_shape)
    global_out = helper.make_tensor_value_info("global_out", TensorProto.FLOAT,
                                               output_shape)
    value_info = [
        helper.make_tensor_value_info("conv_param", TensorProto.FLOAT,
                                      conv_param_shape),
        helper.make_tensor_value_info("thres1_param", TensorProto.FLOAT,
                                      (out_chn, 15)),
        helper.make_tensor_value_info("matmul_param", TensorProto.FLOAT,
                                      fc_param_shape),
        helper.make_tensor_value_info("thres2_param", TensorProto.FLOAT,
                                      (fc_filters, 15)),
        helper.make_tensor_value_info("reshape_shape", TensorProto.INT64, []),
    ]

    if use_reshape:
        flatten_node = helper.make_node("Reshape",
                                        ["thres1_out", "reshape_shape"],
                                        ["flatten_out"])
    else:
        flatten_node = helper.make_node("Flatten", ["thres1_out"],
                                        ["flatten_out"],
                                        axis=1)

    modelproto = helper.make_model(
        helper.make_graph(
            name="test",
            inputs=[global_in],
            outputs=[global_out],
            value_info=value_info,
            nodes=[
                helper.make_node("Conv", ["global_in", "conv_param"],
                                 ["conv_out"], **conv_config),
                helper.make_node(
                    "MultiThreshold",
                    ["conv_out", "thres1_param"],
                    ["thres1_out"],
                    domain="finn.custom_op.general",
                    out_dtype="UINT4",
                ),
                flatten_node,
                helper.make_node("MatMul", ["flatten_out", "matmul_param"],
                                 ["matmul_out"]),
                helper.make_node(
                    "MultiThreshold",
                    ["matmul_out", "thres2_param"],
                    ["global_out"],
                    domain="finn.custom_op.general",
                    out_dtype="UINT4",
                ),
            ],
        ))

    model = ModelWrapper(modelproto)
    model.set_tensor_datatype("global_in", idt)
    model.set_tensor_layout("global_in", DataLayout.NCHW)
    model.set_tensor_datatype("global_out", odt)
    model.set_tensor_datatype("conv_param", conv_weight_dt)
    model.set_tensor_datatype("matmul_param", fc_weight_dt)
    model.set_tensor_datatype("thres1_param", DataType["INT32"])
    model.set_tensor_datatype("thres2_param", DataType["INT32"])

    model.set_initializer("conv_param",
                          gen_finn_dt_tensor(conv_weight_dt, conv_param_shape))
    model.set_initializer("thres1_param",
                          get_multithreshold_rand_params(out_chn, 15, seed=0))
    model.set_initializer(
        "thres2_param", get_multithreshold_rand_params(fc_filters, 15, seed=0))
    model.set_initializer("matmul_param",
                          gen_finn_dt_tensor(fc_weight_dt, fc_param_shape))
    model.set_initializer("reshape_shape", np.array([1, -1]))

    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())
    model = model.transform(InferDataLayouts())

    # streamlining
    new_model = model.transform(MoveScalarLinearPastInvariants())
    new_model = new_model.transform(Streamline())
    new_model = new_model.transform(LowerConvsToMatMul())
    new_model = new_model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
    new_model = new_model.transform(Streamline())
    new_model = new_model.transform(InferDataLayouts())
    new_model = new_model.transform(RemoveUnusedTensors())

    # convert_to_hls
    if depthwise is True:
        new_model = new_model.transform(to_hls.InferVVAU())
    new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer())
    new_model = new_model.transform(to_hls.InferThresholdingLayer())
    new_model = new_model.transform(to_hls.InferConvInpGen())
    new_model = new_model.transform(to_hls.InferStreamingMaxPool())
    new_model = new_model.transform(RemoveCNVtoFCFlatten())
    new_model = new_model.transform(absorb.AbsorbConsecutiveTransposes())
    new_model = new_model.transform(GiveUniqueNodeNames())
    new_model = new_model.transform(InferDataLayouts())

    # prepare cppsim
    new_model = new_model.transform(PrepareCppSim())
    new_model = new_model.transform(CompileCppSim())
    new_model = new_model.transform(SetExecMode("cppsim"))

    # check for correct execution
    x = gen_finn_dt_tensor(idt, input_shape)
    inp_dict = {model.graph.input[0].name: x}
    assert oxe.compare_execution(model, new_model, inp_dict)

    num_transpose = len(new_model.get_nodes_by_op_type("Transpose"))
    num_flatten = len(new_model.get_nodes_by_op_type("Flatten"))
    num_reshape = len(new_model.get_nodes_by_op_type("Reshape"))

    # check if transpose->flatten was removed
    assert num_transpose == 1 and num_flatten == 0 and num_reshape == 0