Exemplo n.º 1
0
def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig):
    """
    Depending on the auto_fifo_depths setting, do one of the following:
    * if auto_fifo_depths=True:  Run the `InsertAndSetFIFODepths` transformation
    to attempt to determine the FIFO sizes that provide full throughput. Involves
    running stitched-IP rtlsim and may take a long time.
    * if auto_fifo_depths=False:  Assume the folding config file contains FIFO
    sizes as well. Runs the `InsertFIFO` transformation, then
    `ApplyConfig(cfg.folding_config_file)`, and finally `RemoveShallowFIFOs`.
    Coherency with config file node naming is ensured by calling
    `GiveUniqueNodeNames`.
    """

    if cfg.auto_fifo_depths:
        model = model.transform(
            InsertAndSetFIFODepths(
                cfg._resolve_fpga_part(),
                cfg._resolve_hls_clk_period(),
                vivado_ram_style=cfg.large_fifo_mem_style.value,
            ))
    else:
        # assume folding cfg json contains FIFO sizes too
        # insert DWCs, FIFOs and run ApplyConfig once more
        model = model.transform(InsertDWC())
        # need to make sure all FIFOs are created so that their depth can be
        # set by ApplyConfig, so create_shallow_fifos=True
        model = model.transform(InsertFIFO(create_shallow_fifos=True))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(GiveReadableTensorNames())
        if cfg.folding_config_file is not None:
            model = model.transform(ApplyConfig(cfg.folding_config_file))
        # remove any shallow FIFOs
        model = model.transform(RemoveShallowFIFOs())

    # extract the final configuration and save it as json
    hw_attrs = [
        "PE",
        "SIMD",
        "ram_style",
        "depth",
        "impl_style",
        "resType",
        "mem_mode",
        "runtime_writeable_weights",
    ]
    extract_model_config_to_json(model,
                                 cfg.output_dir + "/final_hw_config.json",
                                 hw_attrs)

    # after FIFOs are ready to go, call PrepareIP and HLSSynthIP again
    # this will only run for the new nodes (e.g. FIFOs and DWCs)
    model = model.transform(
        PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()))
    model = model.transform(HLSSynthIP())
    return model
Exemplo n.º 2
0
def test_res_estimate():
    mw = mh = 4
    simd = 1
    pe = 1
    idt = DataType.INT2
    wdt = DataType.INT2
    odt = DataType.INT32
    actval = odt.min()

    inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, mw])
    outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, mh])
    node_inp_list = ["inp", "weights", "thresh"]

    FCLayer_node = helper.make_node(
        "StreamingFCLayer_Batch",
        node_inp_list,
        ["outp"],
        domain="finn",
        backend="fpgadataflow",
        resType="ap_resource_lut()",
        MW=mw,
        MH=mh,
        SIMD=simd,
        PE=pe,
        inputDataType=idt.name,
        weightDataType=wdt.name,
        outputDataType=odt.name,
        ActVal=actval,
        binaryXnorMode=0,
        noActivation=0,
    )
    graph = helper.make_graph(nodes=[FCLayer_node],
                              name="fclayer_graph",
                              inputs=[inp],
                              outputs=[outp])

    model = helper.make_model(graph, producer_name="fclayer-model")
    model = ModelWrapper(model)

    model.set_tensor_datatype("inp", idt)
    model.set_tensor_datatype("outp", odt)
    model.set_tensor_datatype("weights", wdt)

    model = model.transform(GiveUniqueNodeNames())
    prod_resource_estimation = model.analysis(res_estimation)
    expect_resource_estimation = {
        "StreamingFCLayer_Batch_0": {
            "BRAM_18K": 1,
            "LUT": 304.4
        }
    }

    assert check_two_dict_for_equality(
        prod_resource_estimation,
        expect_resource_estimation), """The produced output of
Exemplo n.º 3
0
    def apply(self, model):
        # first infer layouts
        model = model.transform(InferDataLayouts())
        # prepare at global level, then break up into kernels
        prep_transforms = [
            InsertIODMA(64),
            InsertDWC(),
            Floorplan(),
            CreateDataflowPartition(),
        ]
        for trn in prep_transforms:
            model = model.transform(trn)
            model = model.transform(GiveUniqueNodeNames())
            model = model.transform(GiveReadableTensorNames())
        # Build each kernel individually
        sdp_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition")
        for sdp_node in sdp_nodes:
            prefix = sdp_node.name + "_"
            sdp_node = getCustomOp(sdp_node)
            dataflow_model_filename = sdp_node.get_nodeattr("model")
            kernel_model = ModelWrapper(dataflow_model_filename)
            kernel_model = kernel_model.transform(InsertFIFO())
            kernel_model = kernel_model.transform(GiveUniqueNodeNames(prefix))
            kernel_model.save(dataflow_model_filename)
            kernel_model = kernel_model.transform(
                PrepareIP(self.fpga_part, self.period_ns))
            kernel_model = kernel_model.transform(HLSSynthIP())
            kernel_model = kernel_model.transform(
                CreateStitchedIP(self.fpga_part, self.period_ns,
                                 sdp_node.onnx_node.name, True))
            kernel_model.set_metadata_prop("platform", "zynq-iodma")
            kernel_model.save(dataflow_model_filename)
        # Assemble design from IPs
        model = model.transform(
            MakeZYNQProject(self.platform, enable_debug=self.enable_debug))

        # set platform attribute for correct remote execution
        model.set_metadata_prop("platform", "zynq-iodma")

        # create driver
        model = model.transform(MakePYNQDriver(platform="zynq-iodma"))
        return (model, False)
Exemplo n.º 4
0
def test_end2end_mobilenet_lowering():
    model = load_test_checkpoint_or_skip(build_dir +
                                         "/end2end_mobilenet_streamlined.onnx")
    model = model.transform(LowerConvsToMatMul())
    model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
    model = model.transform(absorb.AbsorbConsecutiveTransposes())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferDataTypes())
    model = model.transform(RoundAndClipThresholds())
    model.save(build_dir + "/end2end_mobilenet_lowered.onnx")
Exemplo n.º 5
0
 def test_import_and_tidy(self, topology, wbits, abits):
     prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "export")
     model = load_test_checkpoint_or_skip(prev_chkpt_name)
     model = model.transform(InferShapes())
     model = model.transform(FoldConstants())
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(GiveReadableTensorNames())
     model = model.transform(InferDataTypes())
     model = model.transform(RemoveStaticGraphInputs())
     chkpt = get_checkpoint_name(topology, wbits, abits, "import_and_tidy")
     model.save(chkpt)
Exemplo n.º 6
0
def tidy_up(model):
    log("Basic transformations launched")
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferDataTypes())
    model = model.transform(RemoveStaticGraphInputs())
    log("Basic transformations completed")
    save(model, "0_tidy")
    return model
Exemplo n.º 7
0
 def test_ipgen(self, topology, wbits, abits, kind):
     if kind == "alveo" and ("VITIS_PATH" not in os.environ):
         pytest.skip("VITIS_PATH not set")
     prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "fold")
     model = load_test_checkpoint_or_skip(prev_chkpt_name)
     test_fpga_part = get_build_env(kind, target_clk_ns)["part"]
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
     model = model.transform(HLSSynthIP())
     model.save(get_checkpoint_name(topology, wbits, abits,
                                    "ipgen_" + kind))
Exemplo n.º 8
0
def test_end2end_mobilenet_convert_to_hls_layers():
    model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_lowered.onnx")
    model = model.transform(to_hls.InferPool_Batch())
    model = model.transform(to_hls.InferConvInpGen())
    model = model.transform(to_hls.InferVVAU())
    model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
    model = model.transform(to_hls.InferChannelwiseLinearLayer())
    model = model.transform(to_hls.InferLabelSelectLayer())
    model = model.transform(InferShapes())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model.save(build_dir + "/end2end_mobilenet_hls_layers.onnx")
Exemplo n.º 9
0
def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode):
    if num_ch % simd != 0:
        pytest.skip(" num_ch % simd != 0, skipping")
    # generate input data
    x = gen_finn_dt_tensor(idt, [1, idim, idim, num_ch])
    input_dict = {"inp": x}
    odim = idim + pad

    model = make_single_fmpadding_modelwrapper(idim, pad, num_ch, simd, idt,
                                               pad_style)
    model = model.transform(InferShapes())
    model = model.transform(SetExecMode(mode))
    model = model.transform(GiveUniqueNodeNames())
    if mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
    elif mode == "rtlsim":
        model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    expected_oshape = (1, odim, odim, num_ch)
    assert y_produced.shape == expected_oshape

    # calculate reference
    # calculate correct pad according to parameters
    if pad_style == 2:
        if pad % 2 == 0:
            pad_up = pad // 2
            pad_left = pad // 2
        else:
            pad_up = pad // 2 + 1
            pad_left = pad // 2 + 1
    else:
        pad_up = pad // 2
        pad_left = pad // 2

    pad_down = pad - pad_up
    pad_right = pad - pad_left

    y_expected = np.pad(x, ((0, 0), (pad_up, pad_down), (pad_left, pad_right),
                            (0, 0)), "constant")

    assert (y_produced == y_expected).all()

    if mode == "rtlsim":
        node = model.get_nodes_by_op_type("FMPadding_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Exemplo n.º 10
0
def test_end2end_mobilenet_export():
    # export preprocessing
    preproc_onnx = build_dir + "/end2end_mobilenet_preproc.onnx"
    mean = [0.485, 0.456, 0.406]
    std = 0.226
    ch = 3
    preproc = NormalizePreProc(mean, std, ch)
    bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx)
    preproc_model = ModelWrapper(preproc_onnx)
    # set input finn datatype to UINT8
    preproc_model.set_tensor_datatype(preproc_model.graph.input[0].name,
                                      DataType["UINT8"])
    preproc_model = preproc_model.transform(InferShapes())
    preproc_model = preproc_model.transform(FoldConstants())
    preproc_model = preproc_model.transform(GiveUniqueNodeNames())
    preproc_model = preproc_model.transform(GiveUniqueParameterTensors())
    preproc_model = preproc_model.transform(GiveReadableTensorNames())
    preproc_model.save(build_dir + "/end2end_mobilenet_preproc.onnx")

    # export mobilenet
    finn_onnx = build_dir + "/end2end_mobilenet_export.onnx"
    mobilenet = get_test_model_trained("mobilenet", 4, 4)
    bo.export_finn_onnx(mobilenet, (1, 3, 224, 224), finn_onnx)

    # calculate golden output with pytorch/brevitas and save as .npy
    # get single image as input and prepare image
    img = Image.open("/workspace/finn/tests/brevitas/king_charles.jpg")
    # resize smallest side of the image to 256 pixels and resize larger side
    # with same ratio
    img = resize_smaller_side(256, img)
    # crop central 224*224 window
    img = crop_center(224, img)
    # save image as numpy array and as torch tensor to enable testing in
    # brevitas/pytorch and finn and transpose from (H, W, C) to (C, H, W)
    img_np = np.asarray(img).copy().astype(np.float32).transpose(2, 0, 1)
    img_np = img_np.reshape(1, 3, 224, 224)
    np.save(build_dir + "/end2end_mobilenet_input.npy", img_np)
    img_torch = torch.from_numpy(img_np).float()
    # do forward pass in PyTorch/Brevitas
    input_tensor = preproc.forward(img_torch)
    golden = mobilenet.forward(input_tensor).detach().numpy()
    golden_topk = golden.flatten()
    golden_top5 = np.argsort(golden_topk)[-5:]
    golden_top5 = np.flip(golden_top5)
    golden_top5_prob = []
    for index in golden_top5:
        golden_top5_prob.append(golden_topk[index])
    # save golden output values
    np.save(build_dir + "/end2end_mobilenet_golden_top5.npy", golden_top5)
    np.save(build_dir + "/end2end_mobilenet_golden_top5_prob.npy",
            golden_top5_prob)
    assert os.path.isfile(finn_onnx)
    assert os.path.isfile(build_dir + "/end2end_mobilenet_preproc.onnx")
Exemplo n.º 11
0
def step_mobilenet_convert_to_hls_layers(model: ModelWrapper,
                                         cfg: DataflowBuildConfig):
    mem_mode = cfg.default_mem_mode.value
    model = model.transform(to_hls.InferPool_Batch())
    model = model.transform(to_hls.InferConvInpGen())
    model = model.transform(to_hls.InferVVAU())
    model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
    model = model.transform(to_hls.InferChannelwiseLinearLayer())
    model = model.transform(to_hls.InferLabelSelectLayer())
    model = model.transform(InferShapes())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    return model
Exemplo n.º 12
0
def post_processing(model):
    log("Starting Post Processing")
    # Insert Top-1 node at the end
    model = model.transform(InsertTopK(k=1))
    # Tidy-up again
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferDataTypes())
    model = model.transform(RemoveStaticGraphInputs())
    log("Finished Post Processing!")
    save(model, "2_with_pre_post")
    return model
Exemplo n.º 13
0
def inference_cost(model_filename,
                   *,
                   output_json=None,
                   output_onnx=None,
                   preprocess=True,
                   discount_sparsity=True):
    """Print the inference cost estimate metric for given ONNX model.
    Supports the Quant op for weight/activation quantization.

    :param model_filename: Filename for ONNX model
    :param output_json: Optional JSON filename to save the inference cost dict
    :param output_onnx: Optional ONNX filename to save the final model after any
        preprocessing
    :param preprocess: If set, run preprocessing steps such as shape inference,
        datatype inference and constant folding. Strongly recommended.
    :param discount_sparsity: If set, will discount op cost of MAC ops with a
        constant zero weight, and the mem cost of constant zero weights.
    """
    print("Inference cost for " + model_filename)
    model = ModelWrapper(model_filename)
    if preprocess:
        qnt_nodes = model.get_nodes_by_op_type("Quant")
        for qnt_node in qnt_nodes:
            qnt_node.domain = "finn.custom_op.general"
        model = model.transform(InferShapes())
        model = model.transform(GiveUniqueParameterTensors())
        model = model.transform(InferDataTypes())
        model = model.transform(FoldConstants())
        model = model.transform(RemoveUnusedTensors())
        model = model.transform(RemoveStaticGraphInputs())
        model = model.transform(InferDataTypes())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    if output_onnx is not None:
        model.save(output_onnx)
    ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity))
    bops = compute_bops(ret)
    mem_w_bits = compute_mem_bits(ret, "mem_w")
    mem_o_bits = compute_mem_bits(ret, "mem_o")
    ret["total_bops"] = bops
    ret["total_mem_w_bits"] = mem_w_bits
    ret["total_mem_o_bits"] = mem_o_bits

    if "unsupported" in ret:
        ret["unsupported"] = str(ret["unsupported"])
    print(json.dumps(ret, sort_keys=True, indent=2))

    if output_json is not None:
        with open(output_json, "w") as f:
            json.dump(ret, f, sort_keys=True, indent=2)
def test_fpgadataflow_slidingwindow(
    idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw
):
    ofm_dim = int(((ifm_dim - k) / stride) + 1)

    x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch))
    model = make_single_slidingwindow_modelwrapper(
        k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw
    )

    if exec_mode == "cppsim":
        model = model.transform(SetExecMode("cppsim"))
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow")

    # prepare input data
    input_dict = prepare_inputs(x)
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    golden = make_single_im2col_modelwrapper(
        k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt
    )
    y_expected = oxe.execute_onnx(golden, input_dict)["outp"]

    if dw == 0:
        assert (y_produced == y_expected).all()
    else:
        y_expected = y_expected.reshape(
            1, ofm_dim, ofm_dim, k * k, ifm_ch // simd, simd
        )
        y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5)
        y_expected = y_expected.reshape(1, ofm_dim, ofm_dim, ifm_ch * k * k)
        assert (y_produced == y_expected).all()

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("ConvolutionInputGenerator")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Exemplo n.º 15
0
def step_apply_folding_config(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Apply the folding configuration file onto the model to set folding (parallelization)
    and other attributes, if config file is specified."""

    if cfg.folding_config_file is not None:
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(ApplyConfig(cfg.folding_config_file))

    if VerificationStepType.FOLDED_HLS_CPPSIM in cfg._resolve_verification_steps():
        # prepare cppsim
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
        verify_step(model, cfg, "folded_hls_cppsim", need_parent=True)
    return model
Exemplo n.º 16
0
def test_topk_insert(k):
    tfc = get_test_model_trained("TFC", 1, 1)
    bo.export_finn_onnx(tfc, (1, 1, 28, 28), export_onnx_path)
    model = ModelWrapper(export_onnx_path)

    # do transformations (no topk)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferDataTypes())

    # verification: generate random input, run through net, streamline,
    # run again, check that output is top-k
    raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
    input_tensor = onnx.load_tensor_from_string(raw_i)
    input_brevitas = torch.from_numpy(nph.to_array(input_tensor)).float()
    output_golden = tfc.forward(input_brevitas).detach().numpy()
    output_golden_topk = np.flip(output_golden.flatten().argsort())[:k]
    output_golden_topk = output_golden_topk.flatten()

    input_dict = {"global_in": nph.to_array(input_tensor)}

    # insert top-k
    model = model.transform(InsertTopK(k))
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferShapes())

    # verify output of top-k
    output_dict_topk = oxe.execute_onnx(model, input_dict)
    output_pysim_topk = output_dict_topk[list(output_dict_topk.keys())[0]]
    output_pysim_topk = output_pysim_topk.astype(np.int).flatten()

    assert np.array_equal(output_golden_topk, output_pysim_topk)
    os.remove(export_onnx_path)
Exemplo n.º 17
0
def test_renaming():
    # load the onnx model
    raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx")
    model = ModelWrapper(raw_m)
    model = model.transform(InferShapes())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    # do some basic checks
    assert model.graph.input[0].name == "global_in"
    assert model.graph.output[0].name == "global_out"
    assert model.graph.node[1].op_type == "Conv"
    assert model.graph.node[1].name == "Conv_0"
    assert model.graph.node[1].input[1] == "Conv_0_param0"
    assert model.graph.node[6].op_type == "Add"
    assert model.graph.node[6].name == "Add_1"
    assert model.graph.node[6].input[1] == "Add_1_param0"
    # ensure running renaming twice still yields the same names
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    assert model.graph.node[1].op_type == "Conv"
    assert model.graph.node[1].name == "Conv_0"
    assert model.graph.node[1].input[1] == "Conv_0_param0"
    assert model.graph.node[6].op_type == "Add"
    assert model.graph.node[6].name == "Add_1"
    assert model.graph.node[6].input[1] == "Add_1_param0"
    # run renamed model to make sure we did not mess up the topology
    raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
    raw_o = get_data("finn.data",
                     "onnx/mnist-conv/test_data_set_0/output_0.pb")
    input_tensor = onnx.load_tensor_from_string(raw_i)
    output_tensor = onnx.load_tensor_from_string(raw_o)
    input_dict = {"global_in": np_helper.to_array(input_tensor)}
    output_dict = oxe.execute_onnx(model, input_dict)
    assert np.isclose(np_helper.to_array(output_tensor),
                      output_dict["global_out"],
                      atol=1e-3).all()
Exemplo n.º 18
0
def test_fpgadataflow_ipstitch_gen_model():  # exec_mode):
    model = create_one_fc_model()
    if model.graph.node[0].op_type == "StreamingDataflowPartition":
        sdp_node = getCustomOp(model.graph.node[0])
        assert sdp_node.__class__.__name__ == "StreamingDataflowPartition"
        assert os.path.isfile(sdp_node.get_nodeattr("model"))
        model = ModelWrapper(sdp_node.get_nodeattr("model"))
        model.set_metadata_prop("exec_mode", "remote_pynq")
    model = model.transform(InsertTLastMarker())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(CodeGen_ipgen(test_fpga_part, 5))
    model = model.transform(HLSSynth_IPGen())
    assert model.graph.node[0].op_type == "StreamingFCLayer_Batch"
    assert model.graph.node[-1].op_type == "TLastMarker"
    model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_gen_model.onnx")
Exemplo n.º 19
0
def test_fpgadataflow_ipstitch_gen_model(mem_mode):
    model = create_one_fc_model(mem_mode)
    if model.graph.node[0].op_type == "StreamingDataflowPartition":
        sdp_node = getCustomOp(model.graph.node[0])
        assert sdp_node.__class__.__name__ == "StreamingDataflowPartition"
        assert os.path.isfile(sdp_node.get_nodeattr("model"))
        model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model"))
        model.set_metadata_prop("exec_mode", "remote_pynq")
    model = model.transform(InsertTLastMarker())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP(test_fpga_part, 5))
    model = model.transform(HLSSynthIP())
    assert model.graph.node[0].op_type == "StreamingFCLayer_Batch"
    assert model.graph.node[-1].op_type == "TLastMarker"
    model.save(ip_stitch_model_dir +
               "/test_fpgadataflow_ipstitch_gen_model_%s.onnx" % mem_mode)
Exemplo n.º 20
0
def step_tidy_up(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Run the tidy-up step on given model. This includes shape and datatype
    inference, constant folding, and giving nodes and tensors better names.
    """

    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferDataTypes())
    model = model.transform(RemoveStaticGraphInputs())

    if VerificationStepType.TIDY_UP_PYTHON in cfg._resolve_verification_steps():
        verify_step(model, cfg, "initial_python", need_parent=False)

    return model
Exemplo n.º 21
0
def test_infer_datatypes():
    raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx")
    model = ModelWrapper(raw_m)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    # this model has no DataType info, so add some DataType annotation
    # to make things a bit more exciting
    model.set_tensor_datatype("global_in", DataType["UINT8"])
    # Conv with int weights + inputs will have int output datatype
    model.set_tensor_datatype("Conv_0_param0", DataType["INT4"])
    model = model.transform(InferDataTypes())
    assert model.get_tensor_datatype("global_in") == DataType["UINT8"]
    assert model.get_tensor_datatype("Conv_0_out0") == DataType["INT32"]
    assert model.get_tensor_datatype("Relu_0_out0") == DataType["FLOAT32"]
    assert model.get_tensor_datatype("global_out") == DataType["FLOAT32"]
Exemplo n.º 22
0
def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype):

    # generate input data
    x = gen_finn_dt_tensor(finn_dtype, Shape)
    input_dict = prepare_inputs(x, finn_dtype)

    model = make_single_fifo_modelwrapper(Shape, depth, folded_shape, finn_dtype)

    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(InsertTLastMarker())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
    model = model.transform(HLSSynthIP())
    model = model.transform(PrepareRTLSim())
    y = oxe.execute_onnx(model, input_dict)["outp"]
    assert (
        y == x
    ).all(), """The output values are not the same as the
       input values anymore."""
    assert y.shape == tuple(Shape), """The output shape is incorrect."""

    model = model.transform(ReplaceVerilogRelPaths())
    model = model.transform(CreateStitchedIP(test_fpga_part))
    model = model.transform(MakePYNQProject(test_pynq_board))
    model = model.transform(SynthPYNQProject())
    model = model.transform(MakePYNQDriver())
    ip = os.environ["PYNQ_IP"]
    username = os.getenv("PYNQ_USERNAME", "xilinx")
    password = os.getenv("PYNQ_PASSWORD", "xilinx")
    port = os.getenv("PYNQ_PORT", 22)
    target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn")
    model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))

    res = throughput_test(model)
    expected_dict = {}
    expected_dict["runtime[ms]"] = []
    expected_dict["throughput[images/s]"] = []
    expected_dict["DRAM_in_bandwidth[Mb/s]"] = []
    expected_dict["DRAM_out_bandwidth[Mb/s]"] = []
    for key in expected_dict:
        assert (
            key in res
        ), """Throughput test not successful, no value for {}
        in result dictionary""".format(
            key
        )
Exemplo n.º 23
0
def test_fpgadataflow_addstreams(idt, ch, fold, exec_mode):
    if fold == -1:
        pe = 1
    else:
        pe = max(1, ch // fold)
    assert ch % pe == 0

    # generate input data
    x1 = gen_finn_dt_tensor(idt, (1, ch))
    x2 = gen_finn_dt_tensor(idt, (1, ch))

    model = make_addstreams_modelwrapper(ch, pe, idt)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # prepare input data
    input_dict = prepare_inputs(x1, x2)

    oshape = model.get_tensor_shape("outp")
    y = x1 + x2
    y_expected = y.reshape(oshape)
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    y_produced = y_produced.reshape(y_expected.shape)

    assert (y_produced == y_expected).all(), exec_mode + " failed"

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("AddStreams_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Exemplo n.º 24
0
def test_move_flatten_past_affine(data_layout, batch_size):
    if data_layout == DataLayout.NHWC:
        ishape = [batch_size, 1, 1, 1024]
        oshape = [batch_size, 1024]
    else:
        ishape = [batch_size, 1024, 1, 1]
        oshape = [batch_size, 1024]

    inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape)
    outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape)

    flatten_node = helper.make_node("Flatten", ["inp"], ["outp"])

    graph = helper.make_graph(
        nodes=[flatten_node],
        name="move-flatten-graph",
        inputs=[inp],
        outputs=[outp],
    )

    model = helper.make_model(graph, producer_name="move_flatten_model")
    model = ModelWrapper(model)

    model.set_tensor_datatype("inp", DataType.INT2)
    model.set_tensor_layout("inp", data_layout)
    model = model.transform(InsertTopK())
    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())
    model = model.transform(InferDataLayouts())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())

    # compare execution before and after transformation
    inp_values = gen_finn_dt_tensor(DataType.INT2, ishape)
    idict = {model.graph.input[0].name: inp_values}
    model_transformed = model.transform(MoveFlattenPastTopK())
    assert oxe.compare_execution(model, model_transformed, idict)

    # depending on data layout check if graph is transformed or not
    if data_layout == DataLayout.NHWC:
        # check if nodes have new order in transformed graph
        assert model.graph != model_transformed.graph
        assert model_transformed.graph.node[-1].op_type == "Flatten"
    else:
        assert model.graph == model_transformed.graph
Exemplo n.º 25
0
def test_fpgadataflow_globalaccpool(idt, ch, fold, imdim, exec_mode):
    if fold == -1:
        pe = 1
    else:
        pe = ch // fold
    assert ch % pe == 0

    # generate input data
    x = gen_finn_dt_tensor(idt, (1, imdim, imdim, ch))

    model = make_accpool_modelwrapper(ch, pe, imdim, idt)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # prepare input data and execute
    input_dict = prepare_inputs(x, idt)
    y = oxe.execute_onnx(model, input_dict)["outp"]
    expected_y = np.sum(x, axis=(1, 2)).flatten()

    assert (y == expected_y).all(), exec_mode + " failed"

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("GlobalAccPool_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        # commented out, needs performance debug:
        # test_fpgadataflow_globalaccpool[rtlsim-7-1-64-DataType.UINT4]
        # assert False where False =
        # <function isclose at 0x7eff26d5ca60>(50, 103, atol=(0.1 * 103))
        # assert np.isclose(exp_cycles, cycles_rtlsim, atol=0.1 * cycles_rtlsim)
        assert exp_cycles != 0
        assert cycles_rtlsim != 0
Exemplo n.º 26
0
def test_infer_datatypes():
    lfc = get_test_model_trained("LFC", 1, 1)
    bo.export_finn_onnx(lfc, (1, 1, 28, 28), export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    model = model.transform(InferShapes())
    model = model.transform(FoldConstants())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(GiveReadableTensorNames())
    model = model.transform(InferDataTypes())
    assert model.get_tensor_datatype("MatMul_0_out0") == DataType.INT32
    assert model.get_tensor_datatype("MatMul_1_out0") == DataType.INT32
    assert model.get_tensor_datatype("MatMul_2_out0") == DataType.INT32
    assert model.get_tensor_datatype("MatMul_3_out0") == DataType.INT32
    assert model.get_tensor_datatype("Sign_0_out0") == DataType.BIPOLAR
    assert model.get_tensor_datatype("Sign_1_out0") == DataType.BIPOLAR
    assert model.get_tensor_datatype("Sign_2_out0") == DataType.BIPOLAR
    assert model.get_tensor_datatype("Sign_3_out0") == DataType.BIPOLAR
    os.remove(export_onnx_path)
Exemplo n.º 27
0
def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype):

    # generate input data
    x = gen_finn_dt_tensor(finn_dtype, Shape)
    input_dict = prepare_inputs(x, finn_dtype)

    model = make_single_fifo_modelwrapper(Shape, depth, folded_shape,
                                          finn_dtype)

    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
    model = model.transform(HLSSynthIP())
    model = model.transform(PrepareRTLSim())
    y = oxe.execute_onnx(model, input_dict)["outp"]
    assert (y == x).all(), """The output values are not the same as the
       input values anymore."""
    assert y.shape == tuple(Shape), """The output shape is incorrect."""
Exemplo n.º 28
0
def test_fpgadataflow_dwc_rtlsim(Shape, INWidth, OUTWidth, finn_dtype):

    # generate input data
    x = gen_finn_dt_tensor(finn_dtype, Shape)
    input_dict = prepare_inputs(x, finn_dtype)

    model = make_single_dwc_modelwrapper(Shape, INWidth, OUTWidth, finn_dtype)

    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP("xc7z020clg400-1", 5))
    model = model.transform(HLSSynthIP())
    model = model.transform(PrepareRTLSim())
    y = oxe.execute_onnx(model, input_dict)["outp"]

    assert (y == x).all(), """The output values are not the same as the
        input values anymore."""
    assert y.shape == tuple(Shape), """The output shape is incorrect."""
Exemplo n.º 29
0
 def test_ipstitch_rtlsim(self, topology, wbits, abits, kind):
     prev_chkpt_name = get_checkpoint_name(
         topology, wbits, abits, "fifodepth_" + kind
     )
     model = load_test_checkpoint_or_skip(prev_chkpt_name)
     test_fpga_part = get_build_env(kind, target_clk_ns)["part"]
     model = model.transform(InsertDWC())
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(AnnotateCycles())
     perf = model.analysis(dataflow_performance)
     latency = perf["critical_path_cycles"]
     # rtlsim only supports impl_style=rtl for StreamingFIFO, ensure that
     for fifo_layer in model.get_nodes_by_op_type("StreamingFIFO"):
         getCustomOp(fifo_layer).set_nodeattr("impl_style", "rtl")
     model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
     model = model.transform(HLSSynthIP())
     model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
     model = model.transform(PrepareRTLSim())
     model.set_metadata_prop("exec_mode", "rtlsim")
     os.environ["LIVENESS_THRESHOLD"] = str(int(latency * 1.1))
     if rtlsim_trace:
         model.set_metadata_prop(
             "rtlsim_trace", "%s_w%da%d.vcd" % (topology, wbits, abits)
         )
         os.environ["RTLSIM_TRACE_DEPTH"] = "3"
     rtlsim_chkpt = get_checkpoint_name(
         topology, wbits, abits, "ipstitch_rtlsim_" + kind
     )
     model.save(rtlsim_chkpt)
     parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent")
     (input_tensor_npy, output_tensor_npy) = get_golden_io_pair(
         topology, wbits, abits, return_topk=1
     )
     y = execute_parent(parent_chkpt, rtlsim_chkpt, input_tensor_npy)
     model = ModelWrapper(rtlsim_chkpt)
     perf["cycles_rtlsim"] = model.get_metadata_prop("cycles_rtlsim")
     # warnings.warn("Estimated & rtlsim performance: " + str(perf))
     # for (k, v) in perf.items():
     #    update_dashboard_data(topology, wbits, abits, k, v)
     update_dashboard_data(
         topology, wbits, abits, "cycles_rtlsim", perf["cycles_rtlsim"]
     )
     assert np.isclose(y, output_tensor_npy).all()
Exemplo n.º 30
0
def test_fpgadataflow_duplicatestreams(idt, ch, fold, imdim, exec_mode):
    if fold == -1:
        pe = 1
    else:
        pe = ch // fold
    assert ch % pe == 0

    # generate input data
    x = gen_finn_dt_tensor(idt, (1, imdim, imdim, ch))

    model = make_dupstreams_modelwrapper(ch, pe, imdim, idt)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # prepare input data and execute
    input_dict = prepare_inputs(x, idt)
    output_dict = oxe.execute_onnx(model, input_dict)
    y0 = output_dict["outp0"]
    y1 = output_dict["outp1"]
    expected_y = x

    assert (y0 == expected_y).all(), exec_mode + " failed"
    assert (y1 == expected_y).all(), exec_mode + " failed"

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("DuplicateStreams_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0