def test_end2end_tfc_w1a2_verify_dataflow_part(): model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_ipstitch.onnx") x = np.zeros((1, 784), dtype=np.float32) inp_name = model.graph.input[0].name out_name = model.graph.output[0].name inp_dict = {inp_name: x} # cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_cppsim.onnx") ret_cppsim = execute_onnx(model, inp_dict, True) res_cppsim = ret_cppsim[out_name] # node-by-node rtlsim model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_nodebynode_rtlsim.onnx") ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True) res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] # whole-network (ip-stitched) rtlsim model.set_metadata_prop("exec_mode", "rtlsim") model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_whole_rtlsim.onnx") ret_rtlsim_whole = execute_onnx(model, inp_dict, True) res_rtlsim_whole = ret_rtlsim_whole[out_name] assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all() assert np.isclose(res_cppsim, res_rtlsim_whole).all()
def test_fpgadataflow_packed_dsp(ich, och, idim, k, s, pad, wdt, idt, tdt, odt, mode): model = make_model(ich, och, idim, k, s, pad, wdt, idt, tdt, odt) cdp_model = model.transform(InferDoublePackedConv()) assert (len(cdp_model.graph.node) == 3 and cdp_model.graph.node[1].op_type == "ConvDoublePacked_Batch" and cdp_model.graph.node[0].op_type == "Transpose" and cdp_model.graph.node[-1].op_type == "Transpose"), "Incorrect model" # execute models and compare x = gen_finn_dt_tensor(idt, (1, ich, idim, idim)) input_dict = {"inp": x} y_expected = oxe.execute_onnx(model, input_dict)["outp"] if mode == "cppsim": cdp_model = cdp_model.transform(SetExecMode("cppsim")) cdp_model = cdp_model.transform(PrepareCppSim()) cdp_model = cdp_model.transform(CompileCppSim()) y_produced = oxe.execute_onnx(cdp_model, input_dict)["outp"] elif mode == "rtlsim": cdp_model = cdp_model.transform(SetExecMode("rtlsim")) cdp_model = cdp_model.transform(GiveUniqueNodeNames()) cdp_model = cdp_model.transform(GiveReadableTensorNames()) cdp_model = cdp_model.transform(PrepareIP("xc7z020clg400-1", 5)) cdp_model = cdp_model.transform(HLSSynthIP()) cdp_model = cdp_model.transform(PrepareRTLSim()) input_dict = {"global_in": x} y_produced = oxe.execute_onnx(cdp_model, input_dict)["global_out"] assert (y_produced.flatten() == y_expected.flatten()).all(), "cppsim failed"
def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mode): ifm_dim_h = ifm_dim k_h = k if dim_1d: ifm_dim_w = 1 k_w = 1 else: ifm_dim_w = ifm_dim_h k_w = k_h ifm_dim = (ifm_dim_h, ifm_dim_w) k = (k_h, k_w) stride_h = k_h stride_w = k_w ofm_dim_h = int(((ifm_dim_h - k_h) / stride_h) + 1) ofm_dim_w = int(((ifm_dim_w - k_w) / stride_w) + 1) ofm_dim = (ofm_dim_h, ofm_dim_w) if idt == DataType["BIPOLAR"] and dim_1d: pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)") if ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0: pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0") x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch)) # prepare input data input_dict = prepare_inputs(x) golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception( "Unknown exec_mode in test_layer_streaming_maxpool_batch") # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced == y_expected).all() if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("StreamingMaxPool_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) assert exp_cycles != 0
def test_fpgadataflow_labelselect(idt, labels, fold, k, exec_mode): np.random.seed(0) if fold == -1: pe = 1 else: pe = labels // fold assert labels % pe == 0 if k == -1: k = labels # generate input data x = gen_finn_dt_tensor(idt, (1, labels)) model = make_labelselect_modelwrapper(labels, pe, k, idt) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # prepare input data and execute input_dict = prepare_inputs(x, idt) y = oxe.execute_onnx(model, input_dict)["outp"] assert soft_verify_topk(x, y, k), exec_mode + " failed"
def test_end2end_mobilenet_cppsim(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_folded.onnx") x = np.load(build_dir + "/end2end_mobilenet_input.npy") inp_name = model.graph.input[0].name out_name = model.graph.output[0].name inp_dict = {inp_name: x} start = time.time() # cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) end = time.time() elapsed_time = end - start f = open(build_dir + "/end2end_mobilenet_compile_time.txt", "w+") f.write("Execution time in seconds: " + str(elapsed_time)) f.close() model.save(build_dir + "/end2end_mobilenet_cppsim.onnx") ret_cppsim = execute_onnx(model, inp_dict, True) res_cppsim = ret_cppsim[out_name] np.save(build_dir + "/end2end_mobilenet_result_cppsim.npy", res_cppsim) a0 = np.load(build_dir + "/end2end_mobilenet_topk_scale.npy") res_cppsim_prob = ret_cppsim[model.graph.node[-2].output[0]] * a0 np.save(build_dir + "/end2end_mobilenet_result_cppsim_prob.npy", res_cppsim_prob) # check result with golden values golden = np.load(build_dir + "/end2end_mobilenet_golden_top5.npy") golden_prob = np.load(build_dir + "/end2end_mobilenet_golden_top5_prob.npy") assert (golden == res_cppsim).all() assert np.isclose(golden_prob, res_cppsim_prob).all()
def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode): stride = k ofm_dim = int(((ifm_dim - k) / stride) + 1) if ifm_dim % k != 0: pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0") x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch)) # prepare input data input_dict = prepare_inputs(x) golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow") # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced == y_expected).all()
def test_depthwise_conv_hls_cppsim(act, pe, k, stride, padding): idt = wdt = DataType.INT4 ifm_dim = 6 ifm_ch = 4 # set up reference model consisting of Im2Col + MatMul (+ MultiThreshold) model = set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding) input_tensor = gen_finn_dt_tensor(idt, [1, ifm_dim, ifm_dim, ifm_ch]) input_dict = {"inp": input_tensor} new_model = model.transform(InferConvInpGen()) new_model = new_model.transform(InferVVAU()) # set SIMD in ConvInputGen node and PE in VVAU node for n in new_model.graph.node: if n.op_type == "ConvolutionInputGenerator": convinputgen_node = getCustomOp(n) convinputgen_node.set_nodeattr("SIMD", pe) elif n.op_type == "Vector_Vector_Activate_Batch": vvau_node = getCustomOp(n) vvau_node.set_nodeattr("PE", pe) new_model = new_model.transform(SetExecMode("cppsim")) new_model = new_model.transform(PrepareCppSim()) new_model = new_model.transform(CompileCppSim()) assert oxe.compare_execution(model, new_model, input_dict)
def test_end2end_cnv_w1a1_verify_dataflow_part(): model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_ipstitch.onnx") x = np.zeros((1, 32, 32, 3), dtype=np.float32) inp_name = model.graph.input[0].name out_name = model.graph.output[0].name inp_dict = {inp_name: x} # cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) model.save(build_dir + "/end2end_cnv_w1a1_ipgen_cppsim.onnx") ret_cppsim = execute_onnx(model, inp_dict, True) res_cppsim = ret_cppsim[out_name] # node-by-node rtlsim model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) model.save(build_dir + "/end2end_cnv_w1a1_ipgen_nodebynode_rtlsim.onnx") ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True) res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] # whole-network (ip-stitched) rtlsim model.set_metadata_prop("exec_mode", "rtlsim") model.save(build_dir + "/end2end_cnv_w1a1_ipstitch_whole_rtlsim.onnx") # this is a particularly long-running test, set liveness thr. to unlimited os.environ["LIVENESS_THRESHOLD"] = "-1" ret_rtlsim_whole = execute_onnx(model, inp_dict, True) res_rtlsim_whole = ret_rtlsim_whole[out_name] assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all() assert np.isclose(res_cppsim, res_rtlsim_whole).all()
def test_fpgadataflow_channelwise_ops(idt, act, pdt, nf, ich, func, vecs, exec_mode): if nf == -1: nf = ich pe = ich // nf assert ich % pe == 0 # generate input and param data x = gen_finn_dt_tensor(idt, tuple(vecs + [ich])) # C = np.random.randint(idt.min(), idt.max() + 1, ich).astype(np.float32) C = gen_finn_dt_tensor(pdt, (ich)) odt = act model = make_modelwrapper(C, pe, idt, odt, pdt, func, vecs) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # package input data as dictionary input_dict = {"inp": x} oshape = model.get_tensor_shape("outp") C_reshaped = np.broadcast_to(C.flatten(), x.shape) if func == "add": y = x + C_reshaped elif func == "mul": y = x * C_reshaped y_expected = y.reshape(oshape) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] y_produced = y_produced.reshape(y_expected.shape) assert (y_produced == y_expected).all(), "cppsim failed" if exec_mode == "rtlsim": hls_synt_res_est = model.analysis(hls_synth_res_estimation) assert "ChannelwiseOp_Batch_0" in hls_synt_res_est node = model.get_nodes_by_op_type("ChannelwiseOp_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode): if num_ch % simd != 0: pytest.skip(" num_ch % simd != 0, skipping") # generate input data x = gen_finn_dt_tensor(idt, [1, idim, idim, num_ch]) input_dict = {"inp": x} odim = idim + pad model = make_single_fmpadding_modelwrapper(idim, pad, num_ch, simd, idt, pad_style) model = model.transform(InferShapes()) model = model.transform(SetExecMode(mode)) model = model.transform(GiveUniqueNodeNames()) if mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif mode == "rtlsim": model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) y_produced = oxe.execute_onnx(model, input_dict)["outp"] expected_oshape = (1, odim, odim, num_ch) assert y_produced.shape == expected_oshape # calculate reference # calculate correct pad according to parameters if pad_style == 2: if pad % 2 == 0: pad_up = pad // 2 pad_left = pad // 2 else: pad_up = pad // 2 + 1 pad_left = pad // 2 + 1 else: pad_up = pad // 2 pad_left = pad // 2 pad_down = pad - pad_up pad_right = pad - pad_left y_expected = np.pad(x, ((0, 0), (pad_up, pad_down), (pad_left, pad_right), (0, 0)), "constant") assert (y_produced == y_expected).all() if mode == "rtlsim": node = model.get_nodes_by_op_type("FMPadding_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_convert_to_hls_channelwise_layer(pdt, idt, onnx_op_name, scalar_param, exec_mode): ifm_ch = 16 ifm_dim = 5 ishape = (1, ifm_ch, ifm_dim, ifm_dim) if scalar_param: pshape = (1, ) else: pshape = (1, ifm_ch, 1, 1) np.random.seed(0) model = make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt, pshape) # Since the aren't Data types with a bit width of a non power of 2, # there are cases where the input won't use it full range. if idt == DataType["INT32"]: x = gen_finn_dt_tensor(DataType["INT16"], (1, ifm_ch, ifm_dim, ifm_dim)) elif idt == DataType["UINT32"]: x = gen_finn_dt_tensor(DataType["UINT16"], (1, ifm_ch, ifm_dim, ifm_dim)) else: x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim)) input_dict = prepare_inputs(x) y_expected = oxe.execute_onnx(model, input_dict)["outp"] new_model = model.transform(to_hls.InferChannelwiseLinearLayer()) new_model = new_model.transform(GiveUniqueNodeNames()) if exec_mode == "cppsim": new_model = new_model.transform(PrepareCppSim()) new_model = new_model.transform(CompileCppSim()) new_model = new_model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": new_model = new_model.transform(SetExecMode("rtlsim")) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) new_model = new_model.transform(HLSSynthIP()) new_model = new_model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") ctx_produced = oxe.execute_onnx(new_model, input_dict, return_full_exec_context=True) y_produced = ctx_produced["outp"] assert (y_produced == y_expected).all() assert new_model.graph.node[1].op_type == "ChannelwiseOp_Batch"
def test_compilation_trafo(): idt = wdt = odt = DataType.BIPOLAR mw = 8 mh = 8 pe = 4 simd = 4 inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, mw]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, mh]) node_inp_list = ["inp", "weights", "thresh"] FCLayer_node = helper.make_node( "StreamingFCLayer_Batch", node_inp_list, ["outp"], domain="finn", backend="fpgadataflow", code_gen_dir="", executable_path="", resType="ap_resource_lut()", MW=mw, MH=mh, SIMD=simd, PE=pe, inputDataType=idt.name, weightDataType=wdt.name, outputDataType=odt.name, noActivation=1, ) graph = helper.make_graph(nodes=[FCLayer_node], name="fclayer_graph", inputs=[inp], outputs=[outp]) model = helper.make_model(graph, producer_name="fclayer-model") model = ModelWrapper(model) model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("weights", wdt) W = util.gen_finn_dt_tensor(wdt, (mw, mh)) model.set_initializer("weights", W) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) for node in model.graph.node: compilation_attribute = util.get_by_name(node.attribute, "executable_path") executable = compilation_attribute.s.decode("UTF-8") print(executable) assert os.path.isfile(executable), """Executable of node with op type {} does not exist!""".format(node.op_type)
def test_fpgadataflow_slidingwindow( idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw ): ofm_dim = int(((ifm_dim - k) / stride) + 1) x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch)) model = make_single_slidingwindow_modelwrapper( k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw ) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow") # prepare input data input_dict = prepare_inputs(x) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] golden = make_single_im2col_modelwrapper( k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt ) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] if dw == 0: assert (y_produced == y_expected).all() else: y_expected = y_expected.reshape( 1, ofm_dim, ofm_dim, k * k, ifm_ch // simd, simd ) y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5) y_expected = y_expected.reshape(1, ofm_dim, ofm_dim, ifm_ch * k * k) assert (y_produced == y_expected).all() if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("ConvolutionInputGenerator")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_cppsim(self, topology, wbits, abits): prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "fold") model = load_test_checkpoint_or_skip(prev_chkpt_name) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) cppsim_chkpt = get_checkpoint_name(topology, wbits, abits, "cppsim") model.save(cppsim_chkpt) parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent") (input_tensor_npy, output_tensor_npy) = get_golden_io_pair( topology, wbits, abits, return_topk=1 ) y = execute_parent(parent_chkpt, cppsim_chkpt, input_tensor_npy) assert np.isclose(y, output_tensor_npy).all()
def step_apply_folding_config(model: ModelWrapper, cfg: DataflowBuildConfig): """Apply the folding configuration file onto the model to set folding (parallelization) and other attributes, if config file is specified.""" if cfg.folding_config_file is not None: model = model.transform(GiveUniqueNodeNames()) model = model.transform(ApplyConfig(cfg.folding_config_file)) if VerificationStepType.FOLDED_HLS_CPPSIM in cfg._resolve_verification_steps(): # prepare cppsim model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) verify_step(model, cfg, "folded_hls_cppsim", need_parent=True) return model
def test_fpgadataflow_addstreams(idt, ch, fold, exec_mode): if fold == -1: pe = 1 else: pe = max(1, ch // fold) assert ch % pe == 0 # generate input data x1 = gen_finn_dt_tensor(idt, (1, ch)) x2 = gen_finn_dt_tensor(idt, (1, ch)) model = make_addstreams_modelwrapper(ch, pe, idt) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # prepare input data input_dict = prepare_inputs(x1, x2) oshape = model.get_tensor_shape("outp") y = x1 + x2 y_expected = y.reshape(oshape) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] y_produced = y_produced.reshape(y_expected.shape) assert (y_produced == y_expected).all(), exec_mode + " failed" if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("AddStreams_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_fpgadataflow_globalaccpool(idt, ch, fold, imdim, exec_mode): if fold == -1: pe = 1 else: pe = ch // fold assert ch % pe == 0 # generate input data x = gen_finn_dt_tensor(idt, (1, imdim, imdim, ch)) model = make_accpool_modelwrapper(ch, pe, imdim, idt) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # prepare input data and execute input_dict = prepare_inputs(x, idt) y = oxe.execute_onnx(model, input_dict)["outp"] expected_y = np.sum(x, axis=(1, 2)).flatten() assert (y == expected_y).all(), exec_mode + " failed" if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("GlobalAccPool_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] # commented out, needs performance debug: # test_fpgadataflow_globalaccpool[rtlsim-7-1-64-DataType.UINT4] # assert False where False = # <function isclose at 0x7eff26d5ca60>(50, 103, atol=(0.1 * 103)) # assert np.isclose(exp_cycles, cycles_rtlsim, atol=0.1 * cycles_rtlsim) assert exp_cycles != 0 assert cycles_rtlsim != 0
def test_fpgadataflow_duplicatestreams(idt, ch, fold, imdim, exec_mode): if fold == -1: pe = 1 else: pe = ch // fold assert ch % pe == 0 # generate input data x = gen_finn_dt_tensor(idt, (1, imdim, imdim, ch)) model = make_dupstreams_modelwrapper(ch, pe, imdim, idt) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # prepare input data and execute input_dict = prepare_inputs(x, idt) output_dict = oxe.execute_onnx(model, input_dict) y0 = output_dict["outp0"] y1 = output_dict["outp1"] expected_y = x assert (y0 == expected_y).all(), exec_mode + " failed" assert (y1 == expected_y).all(), exec_mode + " failed" if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("DuplicateStreams_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode): stride = k ofm_dim = int(((ifm_dim - k) / stride) + 1) if ifm_dim % k != 0: pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0") x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch)) # prepare input data input_dict = prepare_inputs(x) golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow") # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced == y_expected).all() if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("StreamingMaxPool_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) assert exp_cycles != 0
def test_fpgadataflow_lookup(edt, embedding_cfg, exec_mode): ishape = (1, 10) num_embeddings, idt, embedding_dim = embedding_cfg eshape = (num_embeddings, embedding_dim) exp_oshape = tuple(list(ishape) + [embedding_dim]) embeddings = gen_finn_dt_tensor(edt, eshape) model = make_lookup_model(embeddings, ishape, idt, edt) assert len(model.graph.node) == 1 assert model.graph.node[0].op_type == "Gather" iname = model.graph.input[0].name ename = model.graph.node[0].input[0] oname = model.graph.output[0].name assert model.get_tensor_datatype(iname) == idt assert model.get_tensor_datatype(ename) == edt assert model.get_tensor_datatype(oname) == edt assert tuple(model.get_tensor_shape(ename)) == eshape assert tuple(model.get_tensor_shape(oname)) == exp_oshape assert (model.get_initializer(ename) == embeddings).all() itensor = gen_finn_dt_tensor(idt, ishape).astype(np.int64) itensor = np.clip(itensor, 0, num_embeddings - 1) ret = execute_onnx(model, {iname: itensor}) exp_out = np.take(embeddings, itensor, axis=0) assert (exp_out == ret[oname]).all() # call transformation to convert to HLS and verify conversion model = model.transform(InferLookupLayer()) assert model.graph.node[0].op_type == "Lookup" assert model.graph.node[0].input[0] == iname assert model.graph.node[0].input[1] == ename assert model.graph.node[0].output[0] == oname if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 10)) model = model.transform(HLSSynthIP()) model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) ret_sim = execute_onnx(model, {iname: itensor}) assert (exp_out == ret_sim[oname]).all()
def test_convert_to_hls_layers_tfc_w1a1(): tfc = get_test_model_trained("TFC", 1, 1) bo.export_finn_onnx(tfc, (1, 1, 28, 28), export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(absorb.AbsorbAddIntoMultiThreshold()) model = model.transform(absorb.AbsorbMulIntoMultiThreshold()) model = model.transform(RoundAndClipThresholds()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) fc0 = model.graph.node[2] assert fc0.op_type == "StreamingFCLayer_Batch" assert model.get_tensor_shape(fc0.input[0]) == [1, 784] assert model.get_tensor_shape(fc0.input[1]) == [784, 64] assert model.get_tensor_shape(fc0.input[2]) == [64, 1] fc1 = model.graph.node[3] assert fc1.op_type == "StreamingFCLayer_Batch" assert model.get_tensor_shape(fc1.input[0]) == [1, 64] assert model.get_tensor_shape(fc1.input[1]) == [64, 64] assert model.get_tensor_shape(fc1.input[2]) == [64, 1] fc2 = model.graph.node[4] assert fc2.op_type == "StreamingFCLayer_Batch" assert model.get_tensor_shape(fc2.input[0]) == [1, 64] assert model.get_tensor_shape(fc2.input[1]) == [64, 64] assert model.get_tensor_shape(fc2.input[2]) == [64, 1] fc3 = model.graph.node[5] assert fc3.op_type == "StreamingFCLayer_Batch" assert model.get_tensor_shape(fc3.input[0]) == [1, 64] assert model.get_tensor_shape(fc3.input[1]) == [64, 10] fc0w = getCustomOp(fc0) fc0w.set_nodeattr("SIMD", 784) fc0w.set_nodeattr("PE", 16) fc1w = getCustomOp(fc1) fc1w.set_nodeattr("SIMD", 16) fc1w.set_nodeattr("PE", 16) fc2w = getCustomOp(fc2) fc2w.set_nodeattr("SIMD", 16) fc2w.set_nodeattr("PE", 16) fc3w = getCustomOp(fc3) fc3w.set_nodeattr("SIMD", 16) fc3w.set_nodeattr("PE", 10) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) # run using FINN-based execution input_dict = {"global_in": nph.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict) produced = output_dict[list(output_dict.keys())[0]] # run using PyTorch/Brevitas input_tensor = torch.from_numpy(nph.to_array(input_tensor)).float() assert input_tensor.shape == (1, 1, 28, 28) # do forward pass in PyTorch/Brevitas expected = tfc.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode): kernel_size, stride, pad = conv_config np.random.seed(0) idt = DataType.UINT4 in_feature_dim = 7 in_chn = 16 if depthwise is True: group = out_chn = in_chn conv_param_shape = [out_chn, 1, kernel_size, kernel_size] else: group = 1 out_chn = 20 conv_param_shape = [out_chn, in_chn, kernel_size, kernel_size] out_feature_dim = compute_conv_output_dim(in_feature_dim, kernel_size, stride, pad) input_shape = [1, in_chn, in_feature_dim, in_feature_dim] output_shape = [1, out_chn, out_feature_dim, out_feature_dim] conv_weight_dt = DataType.UINT4 conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = group conv_config["kernel_shape"] = [kernel_size, kernel_size] conv_config["pads"] = [pad, pad, pad, pad] conv_config["strides"] = [stride, stride] top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape) value_info = [ helper.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape) ] modelproto = helper.make_model( helper.make_graph( name="conv_test", inputs=[top_in], outputs=[top_out], value_info=value_info, nodes=[ helper.make_node("Conv", ["top_in", "p1"], ["top_out"], **conv_config) ], ) ) model = ModelWrapper(modelproto) model.set_tensor_datatype("top_in", idt) model.set_tensor_datatype("top_out", idt) model.set_tensor_datatype("p1", conv_weight_dt) model.set_initializer("p1", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) new_model = model.transform(LowerConvsToMatMul()) new_model = new_model.transform(to_hls.InferConvInpGen()) if depthwise is True: new_model = new_model.transform(to_hls.InferVVAU()) else: new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer()) fc_node = new_model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0] fc_inst = getCustomOp(fc_node) mw = fc_inst.get_nodeattr("MW") mh = fc_inst.get_nodeattr("MH") pe_cands = list(filter(lambda x: mh % x == 0, range(2, mh + 1))) simd_cands = list(filter(lambda x: mw % x == 0, range(2, mw + 1))) fc_inst.set_nodeattr("PE", pe_cands[0]) fc_inst.set_nodeattr("SIMD", simd_cands[0]) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(InferShapes()) new_model = new_model.transform(InferDataTypes()) if exec_mode == "cppsim": new_model = new_model.transform(PrepareCppSim()) new_model = new_model.transform(CompileCppSim()) new_model = new_model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": new_model = new_model.transform(SetExecMode("rtlsim")) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5)) new_model = new_model.transform(HLSSynthIP()) new_model = new_model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") x = gen_finn_dt_tensor(idt, input_shape) inp_dict = {model.graph.input[0].name: x} assert oxe.compare_execution(model, new_model, inp_dict) if kernel_size == 1 and stride > 1 and pad == 0: assert new_model.graph.node[1].op_type == "DownSampler" if exec_mode == "rtlsim": node = new_model.get_nodes_by_op_type("DownSampler")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = new_model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=11) assert exp_cycles != 0 if pad == 1: padding_node = new_model.get_nodes_by_op_type("FMPadding_Batch")[0] padding_inst = getCustomOp(padding_node) assert padding_inst.get_nodeattr("SIMD") == in_chn if depthwise is True and exec_mode == "rtlsim": node = new_model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = new_model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=11) assert exp_cycles != 0
def test_fpgadataflow_vvau( idt, wdt, act, pe, dim_h, dim_w, k_h, k_w, channels, exec_mode ): if pe == "channels": pe = channels if dim_w == 1 and k_w != 1: pytest.skip("1D image requires 1D kernel, skipping.") if channels % pe != 0: pytest.skip("Requirement Channels divisable by PE is violated.") # Generate weights in expected shape for ONNX and HLS node W = gen_finn_dt_tensor(wdt, (channels, 1, k_h, k_w)) # shape: [channels, 1, k, k] W_onnx = _infer_sparse_weight_tensor( W, k_h, k_w, channels ) # shape: [k*k*channels, channels] # Generate inputs in expected format for ONNX and HLS node x = gen_finn_dt_tensor(idt, (1, dim_h, dim_w, k_h * k_w * channels)) x_vvau = x.reshape(1, dim_h, dim_w, k_h * k_w, channels // pe, pe) x_vvau = x_vvau.transpose(0, 1, 2, 4, 3, 5) x_vvau = x_vvau.reshape(1, dim_h, dim_w, channels * k_h * k_w) if act is None: T = None tdt = None odt = DataType["INT32"] else: odt = act (min_v, max_v) = _calculate_dot_prod_range(idt, wdt, k_h * k_w * channels) n_steps = act.get_num_possible_values() - 1 T = np.random.randint(min_v, max_v - 1, (channels, n_steps)).astype(np.float32) T = np.sort(T, axis=1) tdt = DataType["INT32"] model = _make_single_vvau_modelwrapper( W, pe, k_h, k_w, channels, dim_h, dim_w, wdt, idt, odt, T, tdt ) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode in test_fpgadataflow_vvau") input_dict = prepare_inputs(x_vvau) # Calculate output y_expected = np.matmul(x, W_onnx) # Y is in [N, H, W, C] format if T is not None: # Reshape Y, as multithreshold expects Y to be in [N, C, H, W] format y_expected = np.transpose(y_expected, (0, 3, 1, 2)) y_expected = multithreshold(y_expected, T) y_expected = np.transpose(y_expected, (0, 2, 3, 1)) # signed offset y_expected += act.min() y_produced = oxe.execute_onnx(model, input_dict, return_full_exec_context=False)[ "outp" ] assert (y_produced == y_expected).all(), "cppsim failed" if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_fpgadataflow_slidingwindow_1d(idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw, flip): if flip: k = k[::-1] ifm_dim = ifm_dim[::-1] stride = stride[::-1] dilation = dilation[::-1] k_h, k_w = k ifm_dim_h, ifm_dim_w = ifm_dim stride_h, stride_w = stride dilation_h, dilation_w = dilation if (dilation_h > 1 or dilation_w > 1) and (stride_h > 1 or stride_w > 1): pytest.skip("""Dilation value greater than 1 and stride greater than 1 currently not supported for 1D convolutions""") if simd > ifm_ch: pytest.skip("SIMD cannot be larger than number of input channels") ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) ofm_dim = [ofm_dim_h, ofm_dim_w] x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch)) model = make_single_slidingwindow_modelwrapper( k=k, ifm_ch=ifm_ch, ifm_dim=ifm_dim, ofm_dim=ofm_dim, simd=simd, stride=stride, dilation=dilation, idt=idt, dw=dw, ) if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow") # prepare input data input_dict = prepare_inputs(x) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] golden = make_single_im2col_modelwrapper( k=k, ifm_ch=ifm_ch, ifm_dim=ifm_dim, ofm_dim=ofm_dim, simd=simd, stride=stride, dilation=dilation, idt=idt, ) y_expected = oxe.execute_onnx(golden, input_dict)["outp"] if dw == 0: assert (y_produced == y_expected).all() else: y_expected = y_expected.reshape(1, ofm_dim_h, ofm_dim_w, k_h * k_w, ifm_ch // simd, simd) y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5) y_expected = y_expected.reshape(1, ofm_dim_h, ofm_dim_w, ifm_ch * k_h * k_w) assert (y_produced == y_expected).all() if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("ConvolutionInputGenerator1D")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_fpgadataflow_fclayer_cppsim(mem_mode, idt, wdt, act, nf, sf, mw, mh): if nf == -1: nf = mh if sf == -1: sf = mw pe = mh // nf simd = mw // sf assert mh % pe == 0 assert mw % sf == 0 # generate weights W = gen_finn_dt_tensor(wdt, (mw, mh)) # generate input data x = gen_finn_dt_tensor(idt, (1, mw)) if act is None: # no activation, produce accumulators T = None tdt = None if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: odt = DataType.UINT32 else: odt = DataType.INT32 else: odt = act (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw) n_steps = act.get_num_possible_values() - 1 T = np.random.randint(min, max - 1, (mh, n_steps)).astype(np.float32) # provide non-decreasing thresholds T = np.sort(T, axis=1) # generate thresholds for activation if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: tdt = DataType.UINT32 # bias thresholds to be positive T = np.ceil((T + mw) / 2) assert (T >= 0).all() else: tdt = DataType.INT32 model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt) for node in model.graph.node: # lookup op_type in registry of CustomOps inst = getCustomOp(node) inst.set_nodeattr("mem_mode", mem_mode) model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) # prepare input data input_dict = prepare_inputs(x, idt, wdt) if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: # convert inputs to binary and use xnorpopcountmatmul y = xp.xnorpopcountmatmul((x + 1) / 2, (W + 1) / 2) else: y = np.matmul(x, W) if T is not None: y = multithreshold(y, T) if act == DataType.BIPOLAR: # binary to bipolar y = 2 * y - 1 else: # signed offset y += act.min() oshape = model.get_tensor_shape("outp") y_expected = y.reshape(oshape) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] y_produced = y_produced.reshape(y_expected.shape) assert (y_produced == y_expected).all(), "cppsim failed"
def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape): np.random.seed(0) idt = DataType["UINT4"] odt = DataType["UINT4"] conv_weight_dt = DataType["INT4"] fc_weight_dt = DataType["INT4"] input_shape, kernel_shape, stride, pad = conv_config kernel_size_h, kernel_size_w = kernel_shape input_size_h, input_size_w = input_shape stride_h, stride_w = stride pad_h, pad_w = pad in_chn = 4 fc_filters = 16 if depthwise is True: group = out_chn = in_chn conv_param_shape = [out_chn, 1, kernel_size_h, kernel_size_w] else: group = 1 out_chn = 8 conv_param_shape = [out_chn, in_chn, kernel_size_h, kernel_size_w] output_size_h = compute_conv_output_dim(input_size_h, kernel_size_h, stride_h, 2 * pad_h) output_size_w = compute_conv_output_dim(input_size_w, kernel_size_w, stride_w, 2 * pad_w) input_shape = [1, in_chn, input_size_h, input_size_w] fc_param_shape = [out_chn * output_size_h * output_size_w, fc_filters] output_shape = [1, fc_filters] conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = group conv_config["kernel_shape"] = [kernel_size_h, kernel_size_w] conv_config["pads"] = [pad_h, pad_w, pad_h, pad_w] conv_config["strides"] = [stride_h, stride_w] global_in = helper.make_tensor_value_info("global_in", TensorProto.FLOAT, input_shape) global_out = helper.make_tensor_value_info("global_out", TensorProto.FLOAT, output_shape) value_info = [ helper.make_tensor_value_info("conv_param", TensorProto.FLOAT, conv_param_shape), helper.make_tensor_value_info("thres1_param", TensorProto.FLOAT, (out_chn, 15)), helper.make_tensor_value_info("matmul_param", TensorProto.FLOAT, fc_param_shape), helper.make_tensor_value_info("thres2_param", TensorProto.FLOAT, (fc_filters, 15)), helper.make_tensor_value_info("reshape_shape", TensorProto.INT64, []), ] if use_reshape: flatten_node = helper.make_node("Reshape", ["thres1_out", "reshape_shape"], ["flatten_out"]) else: flatten_node = helper.make_node("Flatten", ["thres1_out"], ["flatten_out"], axis=1) modelproto = helper.make_model( helper.make_graph( name="test", inputs=[global_in], outputs=[global_out], value_info=value_info, nodes=[ helper.make_node("Conv", ["global_in", "conv_param"], ["conv_out"], **conv_config), helper.make_node( "MultiThreshold", ["conv_out", "thres1_param"], ["thres1_out"], domain="finn.custom_op.general", out_dtype="UINT4", ), flatten_node, helper.make_node("MatMul", ["flatten_out", "matmul_param"], ["matmul_out"]), helper.make_node( "MultiThreshold", ["matmul_out", "thres2_param"], ["global_out"], domain="finn.custom_op.general", out_dtype="UINT4", ), ], )) model = ModelWrapper(modelproto) model.set_tensor_datatype("global_in", idt) model.set_tensor_layout("global_in", DataLayout.NCHW) model.set_tensor_datatype("global_out", odt) model.set_tensor_datatype("conv_param", conv_weight_dt) model.set_tensor_datatype("matmul_param", fc_weight_dt) model.set_tensor_datatype("thres1_param", DataType["INT32"]) model.set_tensor_datatype("thres2_param", DataType["INT32"]) model.set_initializer("conv_param", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)) model.set_initializer("thres1_param", get_multithreshold_rand_params(out_chn, 15, seed=0)) model.set_initializer( "thres2_param", get_multithreshold_rand_params(fc_filters, 15, seed=0)) model.set_initializer("matmul_param", gen_finn_dt_tensor(fc_weight_dt, fc_param_shape)) model.set_initializer("reshape_shape", np.array([1, -1])) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) # streamlining new_model = model.transform(MoveScalarLinearPastInvariants()) new_model = new_model.transform(Streamline()) new_model = new_model.transform(LowerConvsToMatMul()) new_model = new_model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) new_model = new_model.transform(Streamline()) new_model = new_model.transform(InferDataLayouts()) new_model = new_model.transform(RemoveUnusedTensors()) # convert_to_hls if depthwise is True: new_model = new_model.transform(to_hls.InferVVAU()) new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer()) new_model = new_model.transform(to_hls.InferThresholdingLayer()) new_model = new_model.transform(to_hls.InferConvInpGen()) new_model = new_model.transform(to_hls.InferStreamingMaxPool()) new_model = new_model.transform(RemoveCNVtoFCFlatten()) new_model = new_model.transform(absorb.AbsorbConsecutiveTransposes()) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(InferDataLayouts()) # prepare cppsim new_model = new_model.transform(PrepareCppSim()) new_model = new_model.transform(CompileCppSim()) new_model = new_model.transform(SetExecMode("cppsim")) # check for correct execution x = gen_finn_dt_tensor(idt, input_shape) inp_dict = {model.graph.input[0].name: x} assert oxe.compare_execution(model, new_model, inp_dict) num_transpose = len(new_model.get_nodes_by_op_type("Transpose")) num_flatten = len(new_model.get_nodes_by_op_type("Flatten")) num_reshape = len(new_model.get_nodes_by_op_type("Reshape")) # check if transpose->flatten was removed assert num_transpose == 1 and num_flatten == 0 and num_reshape == 0
def test_convert_to_hls_layers_cnv_w1a1(fused_activation): cnv = get_test_model_trained("CNV", 1, 1) bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv) model = ModelWrapper(export_onnx_path_cnv) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) model = model.transform(InferDataLayouts()) # model.save("golden.onnx") # load one of the test vectors fn = pk.resource_filename("finn.qnn-data", "cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # generate expected value from streamlined net input_dict = {"global_in": input_tensor} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] # if we infer thresholding first, all MultiThresholds get converted to HLS # subsequently, the FC inference will generate passthrough MVAUs if not fused_activation: model = model.transform(to_hls.InferThresholdingLayer()) model = model.transform(to_hls.InferBinaryStreamingFCLayer()) model = model.transform(to_hls.InferQuantizedStreamingFCLayer()) for node in model.graph.node: if node.op_type == "StreamingFCLayer_Batch": inst = getCustomOp(node) inst.set_nodeattr("mem_mode", "decoupled") mw = inst.get_nodeattr("MW") mh = inst.get_nodeattr("MH") if mh % 4 == 0: pe = mh // 4 else: pe = mh inst.set_nodeattr("PE", pe) if mw % 16 == 0: simd = mw // 16 else: simd = mw inst.set_nodeattr("SIMD", simd) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) # check topology status finn_nodes = model.get_finn_nodes() if fused_activation: assert len(finn_nodes) == 18 else: assert len(finn_nodes) == 26 thr_nodes = model.get_nodes_by_op_type("Thresholding_Batch") assert len(thr_nodes) == 8 non_finn_nodes = model.get_non_finn_nodes() assert len(non_finn_nodes) == 4 exp_non_finn_nodes = ["Transpose", "Reshape", "Mul", "Add"] assert [x.op_type for x in non_finn_nodes] == exp_non_finn_nodes fc_nodes = model.get_nodes_by_op_type("StreamingFCLayer_Batch") assert len(fc_nodes) == 9 swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator") assert len(swg_nodes) == 6 mp_nodes = model.get_nodes_by_op_type("StreamingMaxPool_Batch") assert len(mp_nodes) == 2 # model.save("cnv-pre-compile.onnx") model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) # model.save("cnv-post-compile.onnx") produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path_cnv)
def test_convert_to_hls_layers_tfc_w1a2(): tfc = get_test_model_trained("TFC", 1, 2) bo.export_finn_onnx(tfc, (1, 1, 28, 28), export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) from finn.transformation.fpgadataflow.convert_to_hls_layers import ( InferQuantizedStreamingFCLayer, ) model = model.transform(InferQuantizedStreamingFCLayer()) fc0 = model.graph.node[2] assert fc0.op_type == "StreamingFCLayer_Batch" assert model.get_tensor_shape(fc0.input[0]) == [1, 784] assert model.get_tensor_shape(fc0.input[1]) == [784, 64] assert model.get_tensor_shape(fc0.input[2]) == [64, 2] fc1 = model.graph.node[3] assert fc1.op_type == "StreamingFCLayer_Batch" assert model.get_tensor_shape(fc1.input[0]) == [1, 64] assert model.get_tensor_shape(fc1.input[1]) == [64, 64] assert model.get_tensor_shape(fc1.input[2]) == [64, 2] fc2 = model.graph.node[4] assert fc2.op_type == "StreamingFCLayer_Batch" assert model.get_tensor_shape(fc2.input[0]) == [1, 64] assert model.get_tensor_shape(fc2.input[1]) == [64, 64] assert model.get_tensor_shape(fc2.input[2]) == [64, 2] fc3 = model.graph.node[5] assert fc3.op_type == "StreamingFCLayer_Batch" assert model.get_tensor_shape(fc3.input[0]) == [1, 64] assert model.get_tensor_shape(fc3.input[1]) == [64, 10] fc0w = getCustomOp(fc0) fc0w.set_nodeattr("SIMD", 784) fc0w.set_nodeattr("PE", 16) fc1w = getCustomOp(fc1) fc1w.set_nodeattr("SIMD", 16) fc1w.set_nodeattr("PE", 16) fc2w = getCustomOp(fc2) fc2w.set_nodeattr("SIMD", 16) fc2w.set_nodeattr("PE", 16) fc3w = getCustomOp(fc3) fc3w.set_nodeattr("SIMD", 16) fc3w.set_nodeattr("PE", 10) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) # run using FINN-based execution input_dict = {"global_in": nph.to_array(input_tensor)} output_dict = oxe.execute_onnx(model, input_dict, True) produced = output_dict[model.graph.output[0].name] model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Streamline()) golden_output_dict = oxe.execute_onnx(model, input_dict, True) expected = golden_output_dict[model.graph.output[0].name] assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode, mem_mode): if nf == -1: nf = ich pe = ich // nf assert ich % pe == 0 # generate input data x = gen_finn_dt_tensor(idt, (1, ich)) odt = act n_steps = act.get_num_possible_values() - 1 T = np.random.randint(idt.min(), idt.max() + 1, (ich, n_steps)).astype(np.float32) # make the vivado_hls threshold bug appear (incorrect rtlsim result when first # threshold of first channel is zero, while using BIPOLAR output) if act == DataType["BIPOLAR"]: T[0][0] = 0 # provide non-decreasing thresholds T = np.sort(T, axis=1) if odt == DataType["BIPOLAR"]: actval = 0 else: actval = odt.min() model = make_single_thresholding_modelwrapper(T, pe, idt, odt, actval, mem_mode) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # package input data as dictionary input_dict = {"inp": x} y = multithreshold(x, T) if act == DataType["BIPOLAR"]: # binary to bipolar y = 2 * y - 1 else: # signed offset y += act.min() oshape = model.get_tensor_shape("outp") y_expected = y.reshape(oshape) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] y_produced = y_produced.reshape(y_expected.shape) assert (y_produced == y_expected).all(), "cppsim failed" if exec_mode == "rtlsim": hls_synt_res_est = model.analysis(hls_synth_res_estimation) assert "Thresholding_Batch_0" in hls_synt_res_est node = model.get_nodes_by_op_type("Thresholding_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_convert_to_hls_layers_synthetic(ch, ifmdim, idt): model = make_model(ch, ifmdim) model.save(export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataLayouts()) # model.save("golden.onnx") # generate test vectors of correct shape if ifmdim == -1: input_tensor_shape = (1, ch) else: input_tensor_shape = (1, ch, ifmdim, ifmdim) x = gen_finn_dt_tensor(idt, input_tensor_shape) # generate expected value from streamlined net input_dict = {model.graph.input[0].name: x} output_dict = oxe.execute_onnx(model, input_dict, True) produced_sum = output_dict[model.graph.output[0].name] chw_mul = model.get_initializer(model.graph.node[-1].input[1]) chw_mul = 1 expected_sum = chw_mul * np.sum(2 * (2 * x + 15.0), axis=(2, 3)) / (ifmdim * ifmdim) assert (produced_sum.flatten() == expected_sum.flatten()).all() model = model.transform(InferDataLayouts()) # convert to hls model.set_tensor_datatype(model.graph.input[0].name, idt) # extra streamlining model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(MoveAddPastMul()) model = model.transform(CollapseRepeatedMul()) model = model.transform(CollapseRepeatedAdd()) # insert top-k node, which should absorb linear ops before it model = model.transform(InferShapes()) model = model.transform(InferDataLayouts()) model = model.transform(InferDataTypes()) model = model.transform(to_hls.InferChannelwiseLinearLayer()) model = model.transform(to_hls.InferAddStreamsLayer()) model = model.transform(to_hls.InferGlobalAccPoolLayer()) model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(InsertTopK()) model = model.transform(AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataTypes()) model = model.transform(to_hls.InferLabelSelectLayer()) model = model.transform(AbsorbConsecutiveTransposes()) model = model.transform(InferDataTypes()) model = model.transform(to_hls.InferLabelSelectLayer()) model = model.transform(to_hls.InferDuplicateStreamsLayer()) model = model.transform(SortGraph()) # model.save("golden_hls.onnx") # check topology status finn_nodes = model.get_finn_nodes() assert len(finn_nodes) == 9 add_nodes = model.get_nodes_by_op_type("AddStreams_Batch") assert len(add_nodes) == 1 pool_nodes = model.get_nodes_by_op_type("GlobalAccPool_Batch") assert len(pool_nodes) == 1 label_nodes = model.get_nodes_by_op_type("LabelSelect_Batch") assert len(label_nodes) == 1 channelwise_nodes = model.get_nodes_by_op_type("ChannelwiseOp_Batch") assert len(channelwise_nodes) == 5 dup_nodes = model.get_nodes_by_op_type("DuplicateStreams_Batch") assert len(dup_nodes) == 1 model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) output_dict = oxe.execute_onnx(model, input_dict, True) produced_topk_hls = output_dict[model.graph.output[0].name] topk_input = output_dict[model.graph.node[-1].input[0]] assert soft_verify_topk(topk_input, produced_topk_hls, 5) os.remove(export_onnx_path)