Exemplo n.º 1
0
def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig):
    """Create stitched IP for a graph after all HLS IP blocks have been generated.
    Depends on the DataflowOutputType.STITCHED_IP output product."""

    if DataflowOutputType.STITCHED_IP in cfg.generate_outputs:
        stitched_ip_dir = cfg.output_dir + "/stitched_ip"
        model = model.transform(
            CreateStitchedIP(cfg._resolve_fpga_part(),
                             cfg.synth_clk_period_ns))
        # TODO copy all ip sources into output dir? as zip?
        copytree(model.get_metadata_prop("vivado_stitch_proj"),
                 stitched_ip_dir)
        print("Vivado stitched IP written into " + stitched_ip_dir)
    if VerificationStepType.STITCHED_IP_RTLSIM in cfg._resolve_verification_steps(
    ):
        # prepare ip-stitched rtlsim
        verify_model = deepcopy(model)
        # rtlsim only supports impl_style=rtl for StreamingFIFO, ensure that
        for fifo_layer in verify_model.get_nodes_by_op_type("StreamingFIFO"):
            getCustomOp(fifo_layer).set_nodeattr("impl_style", "rtl")
        # similarly for StreamingDataWidthConverter with impl_style=hls
        for dwc_layer in verify_model.get_nodes_by_op_type(
                "StreamingDataWidthConverter_Batch"):
            getCustomOp(dwc_layer).set_nodeattr("impl_style", "hls")
        verify_model = verify_model.transform(PrepareRTLSim())
        verify_model.set_metadata_prop("exec_mode", "rtlsim")
        verify_step(verify_model, cfg, "stitched_ip_rtlsim", need_parent=True)
    return model
Exemplo n.º 2
0
def test_end2end_cnv_w1a1_verify_dataflow_part():
    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_ipstitch.onnx")
    x = np.zeros((1, 32, 32, 3), dtype=np.float32)
    inp_name = model.graph.input[0].name
    out_name = model.graph.output[0].name
    inp_dict = {inp_name: x}
    # cppsim
    model = model.transform(PrepareCppSim())
    model = model.transform(CompileCppSim())
    model = model.transform(SetExecMode("cppsim"))
    model.save(build_dir + "/end2end_cnv_w1a1_ipgen_cppsim.onnx")
    ret_cppsim = execute_onnx(model, inp_dict, True)
    res_cppsim = ret_cppsim[out_name]
    # node-by-node rtlsim
    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(PrepareRTLSim())
    model.save(build_dir + "/end2end_cnv_w1a1_ipgen_nodebynode_rtlsim.onnx")
    ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True)
    res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name]
    # whole-network (ip-stitched) rtlsim
    model.set_metadata_prop("exec_mode", "rtlsim")
    model.save(build_dir + "/end2end_cnv_w1a1_ipstitch_whole_rtlsim.onnx")
    # this is a particularly long-running test, set liveness thr. to unlimited
    os.environ["LIVENESS_THRESHOLD"] = "-1"
    ret_rtlsim_whole = execute_onnx(model, inp_dict, True)
    res_rtlsim_whole = ret_rtlsim_whole[out_name]
    assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all()
    assert np.isclose(res_cppsim, res_rtlsim_whole).all()
Exemplo n.º 3
0
def test_end2end_mobilenet_rtlsim():
    model = load_test_checkpoint_or_skip(build_dir +
                                         "/end2end_mobilenet_ipgen.onnx")
    x = np.load(build_dir + "/end2end_mobilenet_input.npy")
    inp_name = model.graph.input[0].name
    out_name = model.graph.output[0].name
    inp_dict = {inp_name: x}
    # node-by-node rtlsim
    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(PrepareRTLSim())
    model.save(build_dir + "/end2end_mobilenet_ipgen_nodebynode_rtlsim.onnx")
    ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True)
    res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name]
    np.save(
        build_dir + "/end2end_mobilenet_result_rtlsim_nodebynode.npy",
        res_rtlsim_nodebynode,
    )
    a0 = np.load(build_dir + "/end2end_mobilenet_topk_scale.npy")
    res_rtlsim_nodebynode_prob = (
        ret_rtlsim_nodebynode[model.graph.node[-2].output[0]] * a0)
    np.save(
        build_dir + "/end2end_mobilenet_result_rtlsim_nodebynode_prob.npy",
        res_rtlsim_nodebynode_prob,
    )

    # check result with golden values
    golden = np.load(build_dir + "/end2end_mobilenet_golden_top5.npy")
    golden_prob = np.load(build_dir +
                          "/end2end_mobilenet_golden_top5_prob.npy")

    assert (golden == res_rtlsim_nodebynode).all()
    assert np.isclose(golden_prob, res_rtlsim_nodebynode_prob).all()
Exemplo n.º 4
0
def test_depthwise_conv_hls_rtlsim(act, pe, k, stride, padding):
    idt = wdt = DataType.INT4
    ifm_dim = 6
    ifm_ch = 4

    # set up reference model consisting of Im2Col + MatMul (+ MultiThreshold)
    model = set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride,
                                   padding)

    input_tensor = gen_finn_dt_tensor(idt, [1, ifm_dim, ifm_dim, ifm_ch])
    input_dict = {"inp": input_tensor}

    new_model = model.transform(InferConvInpGen())
    new_model = new_model.transform(InferVVAU())

    # set SIMD in ConvInputGen node and PE in VVAU node

    for n in new_model.graph.node:
        if n.op_type == "ConvolutionInputGenerator":
            convinputgen_node = getCustomOp(n)
            convinputgen_node.set_nodeattr("SIMD", pe)
        elif n.op_type == "Vector_Vector_Activate_Batch":
            vvau_node = getCustomOp(n)
            vvau_node.set_nodeattr("PE", pe)

    new_model = new_model.transform(SetExecMode("rtlsim"))
    new_model = new_model.transform(GiveUniqueNodeNames())
    new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5))
    new_model = new_model.transform(HLSSynthIP())
    new_model = new_model.transform(PrepareRTLSim())

    assert oxe.compare_execution(model, new_model, input_dict)
Exemplo n.º 5
0
def test_fpgadataflow_packed_dsp(ich, och, idim, k, s, pad, wdt, idt, tdt, odt, mode):
    model = make_model(ich, och, idim, k, s, pad, wdt, idt, tdt, odt)
    cdp_model = model.transform(InferDoublePackedConv())
    assert (len(cdp_model.graph.node) == 3 and
            cdp_model.graph.node[1].op_type == "ConvDoublePacked_Batch" and
            cdp_model.graph.node[0].op_type == "Transpose" and
            cdp_model.graph.node[-1].op_type == "Transpose"), "Incorrect model"
    # execute models and compare
    x = gen_finn_dt_tensor(idt, (1, ich, idim, idim))
    input_dict = {"inp": x}
    y_expected = oxe.execute_onnx(model, input_dict)["outp"]

    if mode == "cppsim":
        cdp_model = cdp_model.transform(SetExecMode("cppsim"))
        cdp_model = cdp_model.transform(PrepareCppSim())
        cdp_model = cdp_model.transform(CompileCppSim())
        y_produced = oxe.execute_onnx(cdp_model, input_dict)["outp"]
    elif mode == "rtlsim":
        cdp_model = cdp_model.transform(SetExecMode("rtlsim"))
        cdp_model = cdp_model.transform(GiveUniqueNodeNames())
        cdp_model = cdp_model.transform(GiveReadableTensorNames())
        cdp_model = cdp_model.transform(PrepareIP("xc7z020clg400-1", 5))
        cdp_model = cdp_model.transform(HLSSynthIP())
        cdp_model = cdp_model.transform(PrepareRTLSim())
        input_dict = {"global_in": x}
        y_produced = oxe.execute_onnx(cdp_model, input_dict)["global_out"]

    assert (y_produced.flatten() == y_expected.flatten()).all(), "cppsim failed"
Exemplo n.º 6
0
def test_end2end_tfc_w1a2_verify_dataflow_part():
    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_ipstitch.onnx")
    x = np.zeros((1, 784), dtype=np.float32)
    inp_name = model.graph.input[0].name
    out_name = model.graph.output[0].name
    inp_dict = {inp_name: x}
    # cppsim
    model = model.transform(PrepareCppSim())
    model = model.transform(CompileCppSim())
    model = model.transform(SetExecMode("cppsim"))
    model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_cppsim.onnx")
    ret_cppsim = execute_onnx(model, inp_dict, True)
    res_cppsim = ret_cppsim[out_name]
    # node-by-node rtlsim
    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(PrepareRTLSim())
    model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_nodebynode_rtlsim.onnx")
    ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True)
    res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name]
    # whole-network (ip-stitched) rtlsim
    model.set_metadata_prop("exec_mode", "rtlsim")
    model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_whole_rtlsim.onnx")
    ret_rtlsim_whole = execute_onnx(model, inp_dict, True)
    res_rtlsim_whole = ret_rtlsim_whole[out_name]
    assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all()
    assert np.isclose(res_cppsim, res_rtlsim_whole).all()
Exemplo n.º 7
0
def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch,
                                       exec_mode):
    ifm_dim_h = ifm_dim
    k_h = k
    if dim_1d:
        ifm_dim_w = 1
        k_w = 1
    else:
        ifm_dim_w = ifm_dim_h
        k_w = k_h
    ifm_dim = (ifm_dim_h, ifm_dim_w)
    k = (k_h, k_w)

    stride_h = k_h
    stride_w = k_w
    ofm_dim_h = int(((ifm_dim_h - k_h) / stride_h) + 1)
    ofm_dim_w = int(((ifm_dim_w - k_w) / stride_w) + 1)
    ofm_dim = (ofm_dim_h, ofm_dim_w)
    if idt == DataType["BIPOLAR"] and dim_1d:
        pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)")
    if ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0:
        pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0")

    x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch))
    # prepare input data
    input_dict = prepare_inputs(x)

    golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim,
                                                  idt)
    y_expected = oxe.execute_onnx(golden, input_dict)["outp"]

    model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim,
                                                      ofm_dim, idt)

    if exec_mode == "cppsim":
        model = model.transform(SetExecMode("cppsim"))
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception(
            "Unknown exec_mode in test_layer_streaming_maxpool_batch")

    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    assert (y_produced == y_expected).all()

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("StreamingMaxPool_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=15)
        assert exp_cycles != 0
def test_fpgadataflow_labelselect(idt, labels, fold, k, exec_mode):
    np.random.seed(0)
    if fold == -1:
        pe = 1
    else:
        pe = labels // fold
    assert labels % pe == 0

    if k == -1:
        k = labels

    # generate input data
    x = gen_finn_dt_tensor(idt, (1, labels))

    model = make_labelselect_modelwrapper(labels, pe, k, idt)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # prepare input data and execute
    input_dict = prepare_inputs(x, idt)
    y = oxe.execute_onnx(model, input_dict)["outp"]

    assert soft_verify_topk(x, y, k), exec_mode + " failed"
def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode):
    stride = k
    ofm_dim = int(((ifm_dim - k) / stride) + 1)
    if ifm_dim % k != 0:
        pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0")

    x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch))
    # prepare input data
    input_dict = prepare_inputs(x)

    golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim,
                                                  idt)
    y_expected = oxe.execute_onnx(golden, input_dict)["outp"]

    model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim,
                                                      ofm_dim, idt)

    if exec_mode == "cppsim":
        model = model.transform(SetExecMode("cppsim"))
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow")

    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    assert (y_produced == y_expected).all()
Exemplo n.º 10
0
def step_measure_rtlsim_performance(model: ModelWrapper,
                                    cfg: DataflowBuildConfig):
    """Measure performance + latency of stitched-IP model in rtlsim (pyverilator).
    Depends on the DataflowOutputType.STITCHED_IP output product.
    """

    if DataflowOutputType.RTLSIM_PERFORMANCE in cfg.generate_outputs:
        assert (DataflowOutputType.STITCHED_IP
                in cfg.generate_outputs), "rtlsim_perf needs stitched IP"
        # prepare ip-stitched rtlsim
        rtlsim_model = deepcopy(model)
        # rtlsim only supports impl_style=rtl for StreamingFIFO, ensure that
        for fifo_layer in rtlsim_model.get_nodes_by_op_type("StreamingFIFO"):
            getCustomOp(fifo_layer).set_nodeattr("impl_style", "rtl")
        # similarly for StreamingDataWidthConverter with impl_style=hls
        for dwc_layer in rtlsim_model.get_nodes_by_op_type(
                "StreamingDataWidthConverter_Batch"):
            getCustomOp(dwc_layer).set_nodeattr("impl_style", "hls")
        rtlsim_model = rtlsim_model.transform(PrepareRTLSim())
        rtlsim_model.set_metadata_prop("exec_mode", "rtlsim")
        # run with single input to get latency
        rtlsim_perf_dict = throughput_test_rtlsim(rtlsim_model, 1)
        rtlsim_latency = rtlsim_perf_dict["cycles"]
        # run with num inputs equal to layers to fill the whole pipeline
        # to get the steady-state throughput
        rtlsim_bs = len(rtlsim_model.graph.node)
        rtlsim_perf_dict = throughput_test_rtlsim(rtlsim_model, rtlsim_bs)
        rtlsim_perf_dict["latency_cycles"] = rtlsim_latency
        report_dir = cfg.output_dir + "/report"
        os.makedirs(report_dir, exist_ok=True)
        with open(report_dir + "/rtlsim_performance.json", "w") as f:
            json.dump(rtlsim_perf_dict, f, indent=2)

    return model
def test_fpgadataflow_channelwise_ops(idt, act, pdt, nf, ich, func, vecs,
                                      exec_mode):
    if nf == -1:
        nf = ich
    pe = ich // nf
    assert ich % pe == 0

    # generate input and param data
    x = gen_finn_dt_tensor(idt, tuple(vecs + [ich]))
    # C = np.random.randint(idt.min(), idt.max() + 1, ich).astype(np.float32)
    C = gen_finn_dt_tensor(pdt, (ich))

    odt = act

    model = make_modelwrapper(C, pe, idt, odt, pdt, func, vecs)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # package input data as dictionary
    input_dict = {"inp": x}

    oshape = model.get_tensor_shape("outp")

    C_reshaped = np.broadcast_to(C.flatten(), x.shape)
    if func == "add":
        y = x + C_reshaped
    elif func == "mul":
        y = x * C_reshaped

    y_expected = y.reshape(oshape)
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]

    y_produced = y_produced.reshape(y_expected.shape)

    assert (y_produced == y_expected).all(), "cppsim failed"

    if exec_mode == "rtlsim":
        hls_synt_res_est = model.analysis(hls_synth_res_estimation)
        assert "ChannelwiseOp_Batch_0" in hls_synt_res_est

        node = model.get_nodes_by_op_type("ChannelwiseOp_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Exemplo n.º 12
0
def prep_model_for_rtlsim(model, src_dir="/tmp/finn_dev_justin"):
    # Make copies of all IP Cores, so that ReplaceVerilogRelPaths is not a permanent change
    randstring = get_rand_string(10)
    copy_dir = "/tmp/finn_dev_justin/rtlsim_" + randstring
    print(f"Copying IP to folder {copy_dir}")
    model = copy_ip(model, copy_dir, src_dir)
    model = model.transform(ReplaceVerilogRelPaths())
    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(PrepareRTLSim())
    return model
Exemplo n.º 13
0
def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode):
    if num_ch % simd != 0:
        pytest.skip(" num_ch % simd != 0, skipping")
    # generate input data
    x = gen_finn_dt_tensor(idt, [1, idim, idim, num_ch])
    input_dict = {"inp": x}
    odim = idim + pad

    model = make_single_fmpadding_modelwrapper(idim, pad, num_ch, simd, idt,
                                               pad_style)
    model = model.transform(InferShapes())
    model = model.transform(SetExecMode(mode))
    model = model.transform(GiveUniqueNodeNames())
    if mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
    elif mode == "rtlsim":
        model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    expected_oshape = (1, odim, odim, num_ch)
    assert y_produced.shape == expected_oshape

    # calculate reference
    # calculate correct pad according to parameters
    if pad_style == 2:
        if pad % 2 == 0:
            pad_up = pad // 2
            pad_left = pad // 2
        else:
            pad_up = pad // 2 + 1
            pad_left = pad // 2 + 1
    else:
        pad_up = pad // 2
        pad_left = pad // 2

    pad_down = pad - pad_up
    pad_right = pad - pad_left

    y_expected = np.pad(x, ((0, 0), (pad_up, pad_down), (pad_left, pad_right),
                            (0, 0)), "constant")

    assert (y_produced == y_expected).all()

    if mode == "rtlsim":
        node = model.get_nodes_by_op_type("FMPadding_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Exemplo n.º 14
0
def test_convert_to_hls_channelwise_layer(pdt, idt, onnx_op_name, scalar_param,
                                          exec_mode):
    ifm_ch = 16
    ifm_dim = 5
    ishape = (1, ifm_ch, ifm_dim, ifm_dim)
    if scalar_param:
        pshape = (1, )
    else:
        pshape = (1, ifm_ch, 1, 1)

    np.random.seed(0)
    model = make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt,
                                             pshape)

    # Since the aren't Data types with a bit width of a non power of 2,
    # there are cases where the input won't use it full range.
    if idt == DataType["INT32"]:
        x = gen_finn_dt_tensor(DataType["INT16"],
                               (1, ifm_ch, ifm_dim, ifm_dim))
    elif idt == DataType["UINT32"]:
        x = gen_finn_dt_tensor(DataType["UINT16"],
                               (1, ifm_ch, ifm_dim, ifm_dim))
    else:
        x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim))

    input_dict = prepare_inputs(x)
    y_expected = oxe.execute_onnx(model, input_dict)["outp"]

    new_model = model.transform(to_hls.InferChannelwiseLinearLayer())
    new_model = new_model.transform(GiveUniqueNodeNames())

    if exec_mode == "cppsim":
        new_model = new_model.transform(PrepareCppSim())
        new_model = new_model.transform(CompileCppSim())
        new_model = new_model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        new_model = new_model.transform(SetExecMode("rtlsim"))
        new_model = new_model.transform(GiveUniqueNodeNames())
        new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5))
        new_model = new_model.transform(HLSSynthIP())
        new_model = new_model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    ctx_produced = oxe.execute_onnx(new_model,
                                    input_dict,
                                    return_full_exec_context=True)
    y_produced = ctx_produced["outp"]

    assert (y_produced == y_expected).all()
    assert new_model.graph.node[1].op_type == "ChannelwiseOp_Batch"
def test_fpgadataflow_slidingwindow(
    idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw
):
    ofm_dim = int(((ifm_dim - k) / stride) + 1)

    x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch))
    model = make_single_slidingwindow_modelwrapper(
        k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw
    )

    if exec_mode == "cppsim":
        model = model.transform(SetExecMode("cppsim"))
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow")

    # prepare input data
    input_dict = prepare_inputs(x)
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    golden = make_single_im2col_modelwrapper(
        k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt
    )
    y_expected = oxe.execute_onnx(golden, input_dict)["outp"]

    if dw == 0:
        assert (y_produced == y_expected).all()
    else:
        y_expected = y_expected.reshape(
            1, ofm_dim, ofm_dim, k * k, ifm_ch // simd, simd
        )
        y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5)
        y_expected = y_expected.reshape(1, ofm_dim, ofm_dim, ifm_ch * k * k)
        assert (y_produced == y_expected).all()

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("ConvolutionInputGenerator")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Exemplo n.º 16
0
def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype):

    # generate input data
    x = gen_finn_dt_tensor(finn_dtype, Shape)
    input_dict = prepare_inputs(x, finn_dtype)

    model = make_single_fifo_modelwrapper(Shape, depth, folded_shape, finn_dtype)

    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(InsertTLastMarker())
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
    model = model.transform(HLSSynthIP())
    model = model.transform(PrepareRTLSim())
    y = oxe.execute_onnx(model, input_dict)["outp"]
    assert (
        y == x
    ).all(), """The output values are not the same as the
       input values anymore."""
    assert y.shape == tuple(Shape), """The output shape is incorrect."""

    model = model.transform(ReplaceVerilogRelPaths())
    model = model.transform(CreateStitchedIP(test_fpga_part))
    model = model.transform(MakePYNQProject(test_pynq_board))
    model = model.transform(SynthPYNQProject())
    model = model.transform(MakePYNQDriver())
    ip = os.environ["PYNQ_IP"]
    username = os.getenv("PYNQ_USERNAME", "xilinx")
    password = os.getenv("PYNQ_PASSWORD", "xilinx")
    port = os.getenv("PYNQ_PORT", 22)
    target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn")
    model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))

    res = throughput_test(model)
    expected_dict = {}
    expected_dict["runtime[ms]"] = []
    expected_dict["throughput[images/s]"] = []
    expected_dict["DRAM_in_bandwidth[Mb/s]"] = []
    expected_dict["DRAM_out_bandwidth[Mb/s]"] = []
    for key in expected_dict:
        assert (
            key in res
        ), """Throughput test not successful, no value for {}
        in result dictionary""".format(
            key
        )
Exemplo n.º 17
0
def test_fpgadataflow_addstreams(idt, ch, fold, exec_mode):
    if fold == -1:
        pe = 1
    else:
        pe = max(1, ch // fold)
    assert ch % pe == 0

    # generate input data
    x1 = gen_finn_dt_tensor(idt, (1, ch))
    x2 = gen_finn_dt_tensor(idt, (1, ch))

    model = make_addstreams_modelwrapper(ch, pe, idt)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # prepare input data
    input_dict = prepare_inputs(x1, x2)

    oshape = model.get_tensor_shape("outp")
    y = x1 + x2
    y_expected = y.reshape(oshape)
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    y_produced = y_produced.reshape(y_expected.shape)

    assert (y_produced == y_expected).all(), exec_mode + " failed"

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("AddStreams_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Exemplo n.º 18
0
def test_fpgadataflow_globalaccpool(idt, ch, fold, imdim, exec_mode):
    if fold == -1:
        pe = 1
    else:
        pe = ch // fold
    assert ch % pe == 0

    # generate input data
    x = gen_finn_dt_tensor(idt, (1, imdim, imdim, ch))

    model = make_accpool_modelwrapper(ch, pe, imdim, idt)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # prepare input data and execute
    input_dict = prepare_inputs(x, idt)
    y = oxe.execute_onnx(model, input_dict)["outp"]
    expected_y = np.sum(x, axis=(1, 2)).flatten()

    assert (y == expected_y).all(), exec_mode + " failed"

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("GlobalAccPool_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        # commented out, needs performance debug:
        # test_fpgadataflow_globalaccpool[rtlsim-7-1-64-DataType.UINT4]
        # assert False where False =
        # <function isclose at 0x7eff26d5ca60>(50, 103, atol=(0.1 * 103))
        # assert np.isclose(exp_cycles, cycles_rtlsim, atol=0.1 * cycles_rtlsim)
        assert exp_cycles != 0
        assert cycles_rtlsim != 0
Exemplo n.º 19
0
def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype):

    # generate input data
    x = gen_finn_dt_tensor(finn_dtype, Shape)
    input_dict = prepare_inputs(x, finn_dtype)

    model = make_single_fifo_modelwrapper(Shape, depth, folded_shape,
                                          finn_dtype)

    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
    model = model.transform(HLSSynthIP())
    model = model.transform(PrepareRTLSim())
    y = oxe.execute_onnx(model, input_dict)["outp"]
    assert (y == x).all(), """The output values are not the same as the
       input values anymore."""
    assert y.shape == tuple(Shape), """The output shape is incorrect."""
Exemplo n.º 20
0
def test_fpgadataflow_dwc_rtlsim(Shape, INWidth, OUTWidth, finn_dtype):

    # generate input data
    x = gen_finn_dt_tensor(finn_dtype, Shape)
    input_dict = prepare_inputs(x, finn_dtype)

    model = make_single_dwc_modelwrapper(Shape, INWidth, OUTWidth, finn_dtype)

    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP("xc7z020clg400-1", 5))
    model = model.transform(HLSSynthIP())
    model = model.transform(PrepareRTLSim())
    y = oxe.execute_onnx(model, input_dict)["outp"]

    assert (y == x).all(), """The output values are not the same as the
        input values anymore."""
    assert y.shape == tuple(Shape), """The output shape is incorrect."""
Exemplo n.º 21
0
 def test_ipstitch_rtlsim(self, topology, wbits, abits, kind):
     prev_chkpt_name = get_checkpoint_name(
         topology, wbits, abits, "fifodepth_" + kind
     )
     model = load_test_checkpoint_or_skip(prev_chkpt_name)
     test_fpga_part = get_build_env(kind, target_clk_ns)["part"]
     model = model.transform(InsertDWC())
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(AnnotateCycles())
     perf = model.analysis(dataflow_performance)
     latency = perf["critical_path_cycles"]
     # rtlsim only supports impl_style=rtl for StreamingFIFO, ensure that
     for fifo_layer in model.get_nodes_by_op_type("StreamingFIFO"):
         getCustomOp(fifo_layer).set_nodeattr("impl_style", "rtl")
     model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
     model = model.transform(HLSSynthIP())
     model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
     model = model.transform(PrepareRTLSim())
     model.set_metadata_prop("exec_mode", "rtlsim")
     os.environ["LIVENESS_THRESHOLD"] = str(int(latency * 1.1))
     if rtlsim_trace:
         model.set_metadata_prop(
             "rtlsim_trace", "%s_w%da%d.vcd" % (topology, wbits, abits)
         )
         os.environ["RTLSIM_TRACE_DEPTH"] = "3"
     rtlsim_chkpt = get_checkpoint_name(
         topology, wbits, abits, "ipstitch_rtlsim_" + kind
     )
     model.save(rtlsim_chkpt)
     parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent")
     (input_tensor_npy, output_tensor_npy) = get_golden_io_pair(
         topology, wbits, abits, return_topk=1
     )
     y = execute_parent(parent_chkpt, rtlsim_chkpt, input_tensor_npy)
     model = ModelWrapper(rtlsim_chkpt)
     perf["cycles_rtlsim"] = model.get_metadata_prop("cycles_rtlsim")
     # warnings.warn("Estimated & rtlsim performance: " + str(perf))
     # for (k, v) in perf.items():
     #    update_dashboard_data(topology, wbits, abits, k, v)
     update_dashboard_data(
         topology, wbits, abits, "cycles_rtlsim", perf["cycles_rtlsim"]
     )
     assert np.isclose(y, output_tensor_npy).all()
Exemplo n.º 22
0
def test_fpgadataflow_duplicatestreams(idt, ch, fold, imdim, exec_mode):
    if fold == -1:
        pe = 1
    else:
        pe = ch // fold
    assert ch % pe == 0

    # generate input data
    x = gen_finn_dt_tensor(idt, (1, imdim, imdim, ch))

    model = make_dupstreams_modelwrapper(ch, pe, imdim, idt)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # prepare input data and execute
    input_dict = prepare_inputs(x, idt)
    output_dict = oxe.execute_onnx(model, input_dict)
    y0 = output_dict["outp0"]
    y1 = output_dict["outp1"]
    expected_y = x

    assert (y0 == expected_y).all(), exec_mode + " failed"
    assert (y1 == expected_y).all(), exec_mode + " failed"

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("DuplicateStreams_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode):
    stride = k
    ofm_dim = int(((ifm_dim - k) / stride) + 1)
    if ifm_dim % k != 0:
        pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0")

    x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch))
    # prepare input data
    input_dict = prepare_inputs(x)

    golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim,
                                                  idt)
    y_expected = oxe.execute_onnx(golden, input_dict)["outp"]

    model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim,
                                                      ofm_dim, idt)

    if exec_mode == "cppsim":
        model = model.transform(SetExecMode("cppsim"))
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow")

    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    assert (y_produced == y_expected).all()

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("StreamingMaxPool_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=15)
        assert exp_cycles != 0
Exemplo n.º 24
0
def test_fpgadataflow_lookup(edt, embedding_cfg, exec_mode):
    ishape = (1, 10)
    num_embeddings, idt, embedding_dim = embedding_cfg
    eshape = (num_embeddings, embedding_dim)
    exp_oshape = tuple(list(ishape) + [embedding_dim])
    embeddings = gen_finn_dt_tensor(edt, eshape)
    model = make_lookup_model(embeddings, ishape, idt, edt)
    assert len(model.graph.node) == 1
    assert model.graph.node[0].op_type == "Gather"
    iname = model.graph.input[0].name
    ename = model.graph.node[0].input[0]
    oname = model.graph.output[0].name
    assert model.get_tensor_datatype(iname) == idt
    assert model.get_tensor_datatype(ename) == edt
    assert model.get_tensor_datatype(oname) == edt
    assert tuple(model.get_tensor_shape(ename)) == eshape
    assert tuple(model.get_tensor_shape(oname)) == exp_oshape
    assert (model.get_initializer(ename) == embeddings).all()
    itensor = gen_finn_dt_tensor(idt, ishape).astype(np.int64)
    itensor = np.clip(itensor, 0, num_embeddings - 1)
    ret = execute_onnx(model, {iname: itensor})
    exp_out = np.take(embeddings, itensor, axis=0)
    assert (exp_out == ret[oname]).all()
    # call transformation to convert to HLS and verify conversion
    model = model.transform(InferLookupLayer())
    assert model.graph.node[0].op_type == "Lookup"
    assert model.graph.node[0].input[0] == iname
    assert model.graph.node[0].input[1] == ename
    assert model.graph.node[0].output[0] == oname
    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 10))
        model = model.transform(HLSSynthIP())
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(PrepareRTLSim())
    ret_sim = execute_onnx(model, {iname: itensor})
    assert (exp_out == ret_sim[oname]).all()
Exemplo n.º 25
0
def test_runtime_thresholds_single_layer():
    mem_mode = "decoupled"
    act = DataType["INT4"]
    idt = DataType["INT16"]
    nf = 8
    ich = 16
    pe = ich // nf
    assert ich % pe == 0

    # generate input data
    in_tensor = gen_finn_dt_tensor(idt, (1, ich))

    odt = act
    n_steps = act.get_num_possible_values() - 1
    T = np.random.randint(idt.min(),
                          idt.max() + 1, (ich, n_steps)).astype(np.float32)
    # provide non-decreasing thresholds
    T = np.sort(T, axis=1)

    if odt == DataType["BIPOLAR"]:
        actval = 0
    else:
        actval = odt.min()

    model = make_single_thresholding_modelwrapper(T, pe, idt, odt, actval,
                                                  mem_mode)
    op_inst = getCustomOp(model.graph.node[0])
    op_inst.set_nodeattr("runtime_writeable_weights", 1)
    op_inst.make_weight_file(T, "decoupled_runtime", "old_weights.dat")
    with open("old_weights.dat", "r") as f:
        old_weight_stream = f.read().strip()
    os.remove("old_weights.dat")
    old_weight_stream = map(lambda x: int(x, 16),
                            old_weight_stream.split("\n"))
    old_weight_stream = list(old_weight_stream)
    # need to create stitched IP for runtime weight testing
    model = model.transform(InsertFIFO(True))
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
    model = model.transform(HLSSynthIP())
    model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
    model = model.transform(PrepareRTLSim())
    model.set_metadata_prop("exec_mode", "rtlsim")
    # add two copies of the input tensor as the first one is just used to
    # "flush out" the pipeline (as mvau already starts receiving old weights while
    # we read/write new ones and reads seem to cause a disturbance too)
    in_tensor = np.tile(in_tensor, (2, 1))
    exec_ctx = {"inp": in_tensor}
    extracted_weight_stream = []

    def read_weights(sim):
        addr = 0
        for i in range(len(old_weight_stream)):
            extracted_weight_stream.append(
                axilite_read(sim, addr, basename="s_axilite_0_"))
            addr += 4

    rtlsim_exec(model, exec_ctx, pre_hook=read_weights)
    assert extracted_weight_stream == old_weight_stream
    # only use second batch element in output; first will be invalid due to
    # old weights (see above)
    y = exec_ctx["outp"][1]
    expected = multithreshold(in_tensor, T)[1]
    if act == DataType["BIPOLAR"]:
        # binary to bipolar
        expected = 2 * expected - 1
    else:
        # signed offset
        expected += act.min()
    assert (y == expected).all()

    new_weights = np.random.randint(idt.min(),
                                    idt.max() + 1,
                                    (ich, n_steps)).astype(np.float32)
    # provide non-decreasing thresholds
    new_weights = np.sort(T, axis=1)
    op_inst.make_weight_file(new_weights, "decoupled_runtime",
                             "new_weights.dat")
    with open("new_weights.dat", "r") as f:
        new_weight_stream = f.read().strip()
    os.remove("new_weights.dat")
    new_weight_stream = map(lambda x: int(x, 16),
                            new_weight_stream.split("\n"))
    new_weight_stream = list(new_weight_stream)

    def write_weights(sim):
        addr = 0
        for nw in new_weight_stream:
            axilite_write(sim, addr, nw, basename="s_axilite_0_")
            addr += 4

    rtlsim_exec(model, exec_ctx, pre_hook=write_weights)
    y = exec_ctx["outp"][1]
    expected = multithreshold(in_tensor, new_weights)[1]
    if act == DataType["BIPOLAR"]:
        # binary to bipolar
        expected = 2 * expected - 1
    else:
        # signed offset
        expected += act.min()
    assert (y == expected).all()
Exemplo n.º 26
0
def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode, mem_mode):
    if nf == -1:
        nf = ich
    pe = ich // nf
    assert ich % pe == 0

    # generate input data
    x = gen_finn_dt_tensor(idt, (1, ich))

    odt = act
    n_steps = act.get_num_possible_values() - 1
    T = np.random.randint(idt.min(),
                          idt.max() + 1, (ich, n_steps)).astype(np.float32)
    # make the vivado_hls threshold bug appear (incorrect rtlsim result when first
    # threshold of first channel is zero, while using BIPOLAR output)
    if act == DataType["BIPOLAR"]:
        T[0][0] = 0
    # provide non-decreasing thresholds
    T = np.sort(T, axis=1)

    if odt == DataType["BIPOLAR"]:
        actval = 0
    else:
        actval = odt.min()

    model = make_single_thresholding_modelwrapper(T, pe, idt, odt, actval,
                                                  mem_mode)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # package input data as dictionary
    input_dict = {"inp": x}

    y = multithreshold(x, T)
    if act == DataType["BIPOLAR"]:
        # binary to bipolar
        y = 2 * y - 1
    else:
        # signed offset
        y += act.min()

    oshape = model.get_tensor_shape("outp")
    y_expected = y.reshape(oshape)
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]

    y_produced = y_produced.reshape(y_expected.shape)

    assert (y_produced == y_expected).all(), "cppsim failed"

    if exec_mode == "rtlsim":
        hls_synt_res_est = model.analysis(hls_synth_res_estimation)
        assert "Thresholding_Batch_0" in hls_synt_res_est

        node = model.get_nodes_by_op_type("Thresholding_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Exemplo n.º 27
0
def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode):
    kernel_size, stride, pad = conv_config
    np.random.seed(0)
    idt = DataType.UINT4

    in_feature_dim = 7
    in_chn = 16

    if depthwise is True:
        group = out_chn = in_chn
        conv_param_shape = [out_chn, 1, kernel_size, kernel_size]
    else:
        group = 1
        out_chn = 20
        conv_param_shape = [out_chn, in_chn, kernel_size, kernel_size]

    out_feature_dim = compute_conv_output_dim(in_feature_dim, kernel_size, stride, pad)

    input_shape = [1, in_chn, in_feature_dim, in_feature_dim]
    output_shape = [1, out_chn, out_feature_dim, out_feature_dim]

    conv_weight_dt = DataType.UINT4

    conv_config = {}
    conv_config["dilations"] = [1, 1]
    conv_config["group"] = group
    conv_config["kernel_shape"] = [kernel_size, kernel_size]
    conv_config["pads"] = [pad, pad, pad, pad]
    conv_config["strides"] = [stride, stride]

    top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape)
    top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape)
    value_info = [
        helper.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape)
    ]

    modelproto = helper.make_model(
        helper.make_graph(
            name="conv_test",
            inputs=[top_in],
            outputs=[top_out],
            value_info=value_info,
            nodes=[
                helper.make_node("Conv", ["top_in", "p1"], ["top_out"], **conv_config)
            ],
        )
    )

    model = ModelWrapper(modelproto)
    model.set_tensor_datatype("top_in", idt)
    model.set_tensor_datatype("top_out", idt)
    model.set_tensor_datatype("p1", conv_weight_dt)
    model.set_initializer("p1", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape))

    model = model.transform(InferShapes())
    model = model.transform(InferDataTypes())

    new_model = model.transform(LowerConvsToMatMul())
    new_model = new_model.transform(to_hls.InferConvInpGen())
    if depthwise is True:
        new_model = new_model.transform(to_hls.InferVVAU())
    else:
        new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer())
        fc_node = new_model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0]
        fc_inst = getCustomOp(fc_node)
        mw = fc_inst.get_nodeattr("MW")
        mh = fc_inst.get_nodeattr("MH")
        pe_cands = list(filter(lambda x: mh % x == 0, range(2, mh + 1)))
        simd_cands = list(filter(lambda x: mw % x == 0, range(2, mw + 1)))
        fc_inst.set_nodeattr("PE", pe_cands[0])
        fc_inst.set_nodeattr("SIMD", simd_cands[0])

    new_model = new_model.transform(GiveUniqueNodeNames())
    new_model = new_model.transform(InferShapes())
    new_model = new_model.transform(InferDataTypes())

    if exec_mode == "cppsim":
        new_model = new_model.transform(PrepareCppSim())
        new_model = new_model.transform(CompileCppSim())
        new_model = new_model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        new_model = new_model.transform(SetExecMode("rtlsim"))
        new_model = new_model.transform(GiveUniqueNodeNames())
        new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5))
        new_model = new_model.transform(HLSSynthIP())
        new_model = new_model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    x = gen_finn_dt_tensor(idt, input_shape)
    inp_dict = {model.graph.input[0].name: x}
    assert oxe.compare_execution(model, new_model, inp_dict)
    if kernel_size == 1 and stride > 1 and pad == 0:
        assert new_model.graph.node[1].op_type == "DownSampler"
        if exec_mode == "rtlsim":
            node = new_model.get_nodes_by_op_type("DownSampler")[0]
            inst = getCustomOp(node)
            cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
            exp_cycles_dict = new_model.analysis(exp_cycles_per_layer)
            exp_cycles = exp_cycles_dict[node.name]
            assert np.isclose(exp_cycles, cycles_rtlsim, atol=11)
            assert exp_cycles != 0

    if pad == 1:
        padding_node = new_model.get_nodes_by_op_type("FMPadding_Batch")[0]
        padding_inst = getCustomOp(padding_node)
        assert padding_inst.get_nodeattr("SIMD") == in_chn

    if depthwise is True and exec_mode == "rtlsim":
        node = new_model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = new_model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=11)
        assert exp_cycles != 0
Exemplo n.º 28
0
def test_fpgadataflow_slidingwindow_1d(idt, k, ifm_dim, ifm_ch, stride,
                                       dilation, exec_mode, simd, dw, flip):
    if flip:
        k = k[::-1]
        ifm_dim = ifm_dim[::-1]
        stride = stride[::-1]
        dilation = dilation[::-1]

    k_h, k_w = k
    ifm_dim_h, ifm_dim_w = ifm_dim
    stride_h, stride_w = stride
    dilation_h, dilation_w = dilation

    if (dilation_h > 1 or dilation_w > 1) and (stride_h > 1 or stride_w > 1):
        pytest.skip("""Dilation value greater than 1 and stride greater than 1
            currently not supported for 1D convolutions""")
    if simd > ifm_ch:
        pytest.skip("SIMD cannot be larger than number of input channels")

    ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0,
                                        dilation_h)
    ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0,
                                        dilation_w)
    ofm_dim = [ofm_dim_h, ofm_dim_w]

    x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch))
    model = make_single_slidingwindow_modelwrapper(
        k=k,
        ifm_ch=ifm_ch,
        ifm_dim=ifm_dim,
        ofm_dim=ofm_dim,
        simd=simd,
        stride=stride,
        dilation=dilation,
        idt=idt,
        dw=dw,
    )

    if exec_mode == "cppsim":
        model = model.transform(SetExecMode("cppsim"))
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow")

    # prepare input data
    input_dict = prepare_inputs(x)
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    golden = make_single_im2col_modelwrapper(
        k=k,
        ifm_ch=ifm_ch,
        ifm_dim=ifm_dim,
        ofm_dim=ofm_dim,
        simd=simd,
        stride=stride,
        dilation=dilation,
        idt=idt,
    )
    y_expected = oxe.execute_onnx(golden, input_dict)["outp"]

    if dw == 0:
        assert (y_produced == y_expected).all()
    else:
        y_expected = y_expected.reshape(1, ofm_dim_h, ofm_dim_w, k_h * k_w,
                                        ifm_ch // simd, simd)
        y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5)
        y_expected = y_expected.reshape(1, ofm_dim_h, ofm_dim_w,
                                        ifm_ch * k_h * k_w)
        assert (y_produced == y_expected).all()

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("ConvolutionInputGenerator1D")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Exemplo n.º 29
0
def test_fpgadataflow_vvau(
    idt, wdt, act, pe, dim_h, dim_w, k_h, k_w, channels, exec_mode
):
    if pe == "channels":
        pe = channels

    if dim_w == 1 and k_w != 1:
        pytest.skip("1D image requires 1D kernel, skipping.")

    if channels % pe != 0:
        pytest.skip("Requirement Channels divisable by PE is violated.")

    # Generate weights in expected shape for ONNX and HLS node
    W = gen_finn_dt_tensor(wdt, (channels, 1, k_h, k_w))  # shape: [channels, 1, k, k]
    W_onnx = _infer_sparse_weight_tensor(
        W, k_h, k_w, channels
    )  # shape: [k*k*channels, channels]

    # Generate inputs in expected format for ONNX and HLS node
    x = gen_finn_dt_tensor(idt, (1, dim_h, dim_w, k_h * k_w * channels))
    x_vvau = x.reshape(1, dim_h, dim_w, k_h * k_w, channels // pe, pe)
    x_vvau = x_vvau.transpose(0, 1, 2, 4, 3, 5)
    x_vvau = x_vvau.reshape(1, dim_h, dim_w, channels * k_h * k_w)

    if act is None:
        T = None
        tdt = None
        odt = DataType["INT32"]
    else:
        odt = act
        (min_v, max_v) = _calculate_dot_prod_range(idt, wdt, k_h * k_w * channels)
        n_steps = act.get_num_possible_values() - 1
        T = np.random.randint(min_v, max_v - 1, (channels, n_steps)).astype(np.float32)
        T = np.sort(T, axis=1)
        tdt = DataType["INT32"]

    model = _make_single_vvau_modelwrapper(
        W, pe, k_h, k_w, channels, dim_h, dim_w, wdt, idt, odt, T, tdt
    )

    if exec_mode == "cppsim":
        model = model.transform(SetExecMode("cppsim"))
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode in test_fpgadataflow_vvau")

    input_dict = prepare_inputs(x_vvau)

    # Calculate output
    y_expected = np.matmul(x, W_onnx)  # Y is in [N, H, W, C] format
    if T is not None:
        # Reshape Y, as multithreshold expects Y to be in [N, C, H, W] format
        y_expected = np.transpose(y_expected, (0, 3, 1, 2))
        y_expected = multithreshold(y_expected, T)
        y_expected = np.transpose(y_expected, (0, 2, 3, 1))
        # signed offset
        y_expected += act.min()

    y_produced = oxe.execute_onnx(model, input_dict, return_full_exec_context=False)[
        "outp"
    ]

    assert (y_produced == y_expected).all(), "cppsim failed"

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
Exemplo n.º 30
0
def test_fpgadataflow_fclayer_large_depth_decoupled_mode_rtlsim(
        mem_mode, idt, wdt, act, nf, sf, mw, mh):
    if nf == -1:
        nf = mh
    if sf == -1:
        sf = mw
    pe = mh // nf
    simd = mw // sf
    assert mh % pe == 0
    assert mw % sf == 0
    # generate weights
    W = gen_finn_dt_tensor(wdt, (mw, mh))
    # generate input data
    x = gen_finn_dt_tensor(idt, (1, mw))
    if act is None:
        # no activation, produce accumulators
        T = None
        tdt = None
        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
            odt = DataType.UINT32
        else:
            odt = DataType.INT32
    else:
        odt = act
        (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw)
        n_steps = act.get_num_possible_values() - 1
        T = np.random.randint(min, max - 1, (mh, n_steps)).astype(np.float32)
        # provide non-decreasing thresholds
        T = np.sort(T, axis=1)
        # generate thresholds for activation
        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
            tdt = DataType.UINT32
            # bias thresholds to be positive
            T = np.ceil((T + mw) / 2)
            assert (T >= 0).all()
        else:
            tdt = DataType.INT32
    model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T,
                                             tdt)
    for node in model.graph.node:
        # lookup op_type in registry of CustomOps
        inst = getCustomOp(node)
        inst.set_nodeattr("mem_mode", mem_mode)

    # prepare input data
    input_dict = prepare_inputs(x, idt, wdt)
    if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
        # convert inputs to binary and use xnorpopcountmatmul
        y = xp.xnorpopcountmatmul((x + 1) / 2, (W + 1) / 2)
    else:
        y = np.matmul(x, W)
    if T is not None:
        y = multithreshold(y, T)
        if act == DataType.BIPOLAR:
            # binary to bipolar
            y = 2 * y - 1
        else:
            # signed offset
            y += act.min()
    oshape = model.get_tensor_shape("outp")
    y_expected = y.reshape(oshape)
    # TODO split up into several dependent tests -- need to check how this
    # works for parametrized tests...
    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP("xc7z020clg400-1", 5))
    model = model.transform(HLSSynthIP())
    model = model.transform(PrepareRTLSim())
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    assert (y_produced.reshape(
        y_expected.shape) == y_expected).all(), "rtlsim failed"

    hls_synt_res_est = model.analysis(hls_synth_res_estimation)
    assert "StreamingFCLayer_Batch_0" in hls_synt_res_est

    node = model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0]
    inst = getCustomOp(node)
    cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
    exp_cycles_dict = model.analysis(exp_cycles_per_layer)
    exp_cycles = exp_cycles_dict[node.name]
    assert np.isclose(exp_cycles, cycles_rtlsim, atol=15)
    assert exp_cycles != 0