def fpga_single_run(self, input): input = input.reshape(self.ishape_normal) input = MT.multithreshold(input, self.mt_node_thresholds) assert input.shape == self.ishape_normal ibuf_folded = input.reshape(self.ishape_folded) # pack the input buffer, reversing both SIMD dim and endianness ibuf_packed = finnpy_to_packed_bytearray(ibuf_folded, self.idt, reverse_endian=True, reverse_inner=True) # copy the packed data into the PYNQ buffer # TODO optimization: pack directly into the PYNQ buffer? np.copyto(self.ibuf_packed_device, ibuf_packed) # set up the DMA and wait until all transfers complete self.dma.sendchannel.transfer(self.ibuf_packed_device) self.dma.recvchannel.transfer(self.obuf_packed) self.dma.sendchannel.wait() self.dma.recvchannel.wait() # unpack the packed output buffer from accelerator obuf_folded = packed_bytearray_to_finnpy(self.obuf_packed, self.odt, self.oshape_folded, reverse_endian=True, reverse_inner=True) obuf_normal = obuf_folded.reshape(self.oshape_normal) obuf_normal = obuf_normal * self.multiply_node_const obuf_normal = obuf_normal + self.add_node_mat return obuf_normal
def test_fpgadataflow_fclayer_npysim(idt, wdt, act, nf, sf, mw, mh): if nf == -1: nf = mh if sf == -1: sf = mw pe = mh // nf simd = mw // sf assert mh % pe == 0 assert mw % sf == 0 # generate weights W = gen_finn_dt_tensor(wdt, (mw, mh)) # generate input data x = gen_finn_dt_tensor(idt, (1, mw)) if act is None: # no activation, produce accumulators T = None tdt = None if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: odt = DataType.UINT32 else: odt = DataType.INT32 else: odt = act (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw) n_steps = act.get_num_possible_values() - 1 T = np.random.randint(min, max - 1, (mh, n_steps)).astype(np.float32) # provide non-decreasing thresholds T = np.sort(T, axis=1) # generate thresholds for activation if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: tdt = DataType.UINT32 # bias thresholds to be positive T = np.ceil((T + mw) / 2) assert (T >= 0).all() else: tdt = DataType.INT32 model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt) model = model.transform(SetExecMode("npysim")) model = model.transform(CodeGen_npysim()) model = model.transform(Compile()) # prepare input data input_dict = prepare_inputs(x, idt, wdt) if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: # convert inputs to binary and use xnorpopcountmatmul y = xp.xnorpopcountmatmul((x + 1) / 2, (W + 1) / 2) else: y = np.matmul(x, W) if T is not None: y = multithreshold(y, T) if act == DataType.BIPOLAR: # binary to bipolar y = 2 * y - 1 else: # signed offset y += act.min() oshape = model.get_tensor_shape("outp") y_expected = y.reshape(oshape) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced.reshape(y_expected.shape) == y_expected).all(), "npysim failed"
def test_execute_multi_thresholding(): inputs = np.ndarray( shape=(6, 3, 2, 2), buffer=np.array([ 4.8, 3.2, 1.2, 4.9, 7.8, 2.4, 3.1, 4.7, 6.2, 5.1, 4.9, 2.2, 6.2, 0.0, 0.8, 4.7, 0.2, 5.6, 8.9, 9.2, 9.1, 4.0, 3.3, 4.9, 2.3, 1.7, 1.3, 2.2, 4.6, 3.4, 3.7, 9.8, 4.7, 4.9, 2.8, 2.7, 8.3, 6.7, 4.2, 7.1, 2.8, 3.1, 0.8, 0.6, 4.4, 2.7, 6.3, 6.1, 1.4, 5.3, 2.3, 1.9, 4.7, 8.1, 9.3, 3.7, 2.7, 5.1, 4.2, 1.8, 4.1, 7.3, 7.1, 0.4, 0.2, 1.3, 4.3, 8.9, 1.4, 1.6, 8.3, 9.4, ]), ) thresholds = np.ndarray( shape=(3, 7), buffer=np.array([ 0.8, 1.4, 1.7, 3.5, 5.2, 6.8, 8.2, 0.2, 2.2, 3.5, 4.5, 6.6, 8.6, 9.2, 1.3, 4.1, 4.5, 6.5, 7.8, 8.1, 8.9, ]), ) outputs = np.ndarray( shape=(6, 3, 2, 2), buffer=np.array([ 4.0, 3.0, 1.0, 4.0, 5.0, 2.0, 2.0, 4.0, 3.0, 3.0, 3.0, 1.0, 5.0, 0.0, 1.0, 4.0, 1.0, 4.0, 6.0, 7.0, 7.0, 1.0, 1.0, 3.0, 3.0, 3.0, 1.0, 3.0, 4.0, 2.0, 3.0, 7.0, 3.0, 3.0, 1.0, 1.0, 7.0, 5.0, 4.0, 6.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 2.0, 5.0, 3.0, 3.0, 4.0, 5.0, 7.0, 3.0, 1.0, 3.0, 2.0, 1.0, 4.0, 6.0, 6.0, 0.0, 1.0, 1.0, 3.0, 6.0, 1.0, 1.0, 6.0, 7.0, ]), ) results = multithreshold(inputs, thresholds) assert (results == outputs).all() results_scaled = multithreshold(inputs, thresholds, 2.0, -1.0) outputs_scaled = 2.0 * outputs - 1.0 assert (results_scaled == outputs_scaled).all()
def test_fpgadataflow_fclayer_large_depth_decoupled_mode_rtlsim( mem_mode, idt, wdt, act, nf, sf, mw, mh ): if nf == -1: nf = mh if sf == -1: sf = mw pe = mh // nf simd = mw // sf assert mh % pe == 0 assert mw % sf == 0 # generate weights W = gen_finn_dt_tensor(wdt, (mw, mh)) # generate input data x = gen_finn_dt_tensor(idt, (1, mw)) if act is None: # no activation, produce accumulators T = None tdt = None if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: odt = DataType.UINT32 else: odt = DataType.INT32 else: odt = act (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw) n_steps = act.get_num_possible_values() - 1 T = np.random.randint(min, max - 1, (mh, n_steps)).astype(np.float32) # provide non-decreasing thresholds T = np.sort(T, axis=1) # generate thresholds for activation if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: tdt = DataType.UINT32 # bias thresholds to be positive T = np.ceil((T + mw) / 2) assert (T >= 0).all() else: tdt = DataType.INT32 model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt) for node in model.graph.node: # lookup op_type in registry of CustomOps inst = getCustomOp(node) inst.set_nodeattr("mem_mode", mem_mode) # prepare input data input_dict = prepare_inputs(x, idt, wdt) if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: # convert inputs to binary and use xnorpopcountmatmul y = xp.xnorpopcountmatmul((x + 1) / 2, (W + 1) / 2) else: y = np.matmul(x, W) if T is not None: y = multithreshold(y, T) if act == DataType.BIPOLAR: # binary to bipolar y = 2 * y - 1 else: # signed offset y += act.min() oshape = model.get_tensor_shape("outp") y_expected = y.reshape(oshape) # TODO split up into several dependent tests -- need to check how this # works for parametrized tests... model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) y_produced = oxe.execute_onnx(model, input_dict)["outp"] assert (y_produced.reshape(y_expected.shape) == y_expected).all(), "rtlsim failed" hls_synt_res_est = model.analysis(hls_synth_res_estimation) assert "StreamingFCLayer_Batch_0" in hls_synt_res_est node = model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) assert exp_cycles != 0
def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode): if nf == -1: nf = ich pe = ich // nf assert ich % pe == 0 # generate input data x = gen_finn_dt_tensor(idt, (1, ich)) odt = act n_steps = act.get_num_possible_values() - 1 T = np.random.randint(idt.min(), idt.max() + 1, (ich, n_steps)).astype(np.float32) # provide non-decreasing thresholds T = np.sort(T, axis=1) if odt == DataType.BIPOLAR: actval = 0 else: actval = odt.min() model = make_single_thresholding_modelwrapper(T, pe, idt, odt, actval) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP("xc7z020clg400-1", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: raise Exception("Unknown exec_mode") # package input data as dictionary input_dict = {"inp": x} y = multithreshold(x, T) if act == DataType.BIPOLAR: # binary to bipolar y = 2 * y - 1 else: # signed offset y += act.min() oshape = model.get_tensor_shape("outp") y_expected = y.reshape(oshape) # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] y_produced = y_produced.reshape(y_expected.shape) assert (y_produced == y_expected).all(), "cppsim failed" if exec_mode == "rtlsim": hls_synt_res_est = model.analysis(hls_synth_res_estimation) assert "Thresholding_Batch_0" in hls_synt_res_est node = model.get_nodes_by_op_type("Thresholding_Batch")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0
def test_multithreshold(): inputs = np.ndarray( shape=(6, 3, 2, 2), buffer=np.array([ 4.8, 3.2, 1.2, 4.9, 7.8, 2.4, 3.1, 4.7, 6.2, 5.1, 4.9, 2.2, 6.2, 0.0, 0.8, 4.7, 0.2, 5.6, 8.9, 9.2, 9.1, 4.0, 3.3, 4.9, 2.3, 1.7, 1.3, 2.2, 4.6, 3.4, 3.7, 9.8, 4.7, 4.9, 2.8, 2.7, 8.3, 6.7, 4.2, 7.1, 2.8, 3.1, 0.8, 0.6, 4.4, 2.7, 6.3, 6.1, 1.4, 5.3, 2.3, 1.9, 4.7, 8.1, 9.3, 3.7, 2.7, 5.1, 4.2, 1.8, 4.1, 7.3, 7.1, 0.4, 0.2, 1.3, 4.3, 8.9, 1.4, 1.6, 8.3, 9.4, ]), ) thresholds = np.ndarray( shape=(3, 7), buffer=np.array([ 0.8, 1.4, 1.7, 3.5, 5.2, 6.8, 8.2, 0.2, 2.2, 3.5, 4.5, 6.6, 8.6, 9.2, 1.3, 4.1, 4.5, 6.5, 7.8, 8.1, 8.9, ]), ) outputs = np.ndarray( shape=(6, 3, 2, 2), buffer=np.array([ 4.0, 3.0, 1.0, 4.0, 5.0, 2.0, 2.0, 4.0, 3.0, 3.0, 3.0, 1.0, 5.0, 0.0, 1.0, 4.0, 1.0, 4.0, 6.0, 7.0, 7.0, 1.0, 1.0, 3.0, 3.0, 3.0, 1.0, 3.0, 4.0, 2.0, 3.0, 7.0, 3.0, 3.0, 1.0, 1.0, 7.0, 5.0, 4.0, 6.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 3.0, 2.0, 5.0, 3.0, 3.0, 4.0, 5.0, 7.0, 3.0, 1.0, 3.0, 2.0, 1.0, 4.0, 6.0, 6.0, 0.0, 1.0, 1.0, 3.0, 6.0, 1.0, 1.0, 6.0, 7.0, ]), ) results = multithreshold(inputs, thresholds) assert (results == outputs).all() results_scaled = multithreshold(inputs, thresholds, 2.0, -1.0) outputs_scaled = 2.0 * outputs - 1.0 assert (results_scaled == outputs_scaled).all() # performance and random test np.random.seed(0) inputs = np.random.random((1, 256, 64, 64)) thresholds = (np.array([[1, 2, 3, 4, 5, 6]]) - 0.5) / 6 before = time.time() vec_results = multithreshold(inputs, thresholds) after = time.time() vector_runtime = after - before before = time.time() nonvec_results = multithreshold_elementwise(inputs, thresholds) after = time.time() non_vector_runtime = after - before assert (vec_results == nonvec_results).all() return vector_runtime, non_vector_runtime