示例#1
0
    def fpga_single_run(self, input):

        input = input.reshape(self.ishape_normal)
        input = MT.multithreshold(input, self.mt_node_thresholds)
        assert input.shape == self.ishape_normal
        ibuf_folded = input.reshape(self.ishape_folded)

        # pack the input buffer, reversing both SIMD dim and endianness
        ibuf_packed = finnpy_to_packed_bytearray(ibuf_folded,
                                                 self.idt,
                                                 reverse_endian=True,
                                                 reverse_inner=True)
        # copy the packed data into the PYNQ buffer
        # TODO optimization: pack directly into the PYNQ buffer?
        np.copyto(self.ibuf_packed_device, ibuf_packed)

        # set up the DMA and wait until all transfers complete
        self.dma.sendchannel.transfer(self.ibuf_packed_device)
        self.dma.recvchannel.transfer(self.obuf_packed)
        self.dma.sendchannel.wait()
        self.dma.recvchannel.wait()

        # unpack the packed output buffer from accelerator
        obuf_folded = packed_bytearray_to_finnpy(self.obuf_packed,
                                                 self.odt,
                                                 self.oshape_folded,
                                                 reverse_endian=True,
                                                 reverse_inner=True)

        obuf_normal = obuf_folded.reshape(self.oshape_normal)
        obuf_normal = obuf_normal * self.multiply_node_const
        obuf_normal = obuf_normal + self.add_node_mat
        return obuf_normal
示例#2
0
def test_fpgadataflow_fclayer_npysim(idt, wdt, act, nf, sf, mw, mh):
    if nf == -1:
        nf = mh
    if sf == -1:
        sf = mw
    pe = mh // nf
    simd = mw // sf
    assert mh % pe == 0
    assert mw % sf == 0
    # generate weights
    W = gen_finn_dt_tensor(wdt, (mw, mh))
    # generate input data
    x = gen_finn_dt_tensor(idt, (1, mw))
    if act is None:
        # no activation, produce accumulators
        T = None
        tdt = None
        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
            odt = DataType.UINT32
        else:
            odt = DataType.INT32
    else:
        odt = act
        (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw)
        n_steps = act.get_num_possible_values() - 1
        T = np.random.randint(min, max - 1, (mh, n_steps)).astype(np.float32)
        # provide non-decreasing thresholds
        T = np.sort(T, axis=1)
        # generate thresholds for activation
        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
            tdt = DataType.UINT32
            # bias thresholds to be positive
            T = np.ceil((T + mw) / 2)
            assert (T >= 0).all()
        else:
            tdt = DataType.INT32
    model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt)
    model = model.transform(SetExecMode("npysim"))
    model = model.transform(CodeGen_npysim())
    model = model.transform(Compile())
    # prepare input data
    input_dict = prepare_inputs(x, idt, wdt)
    if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
        # convert inputs to binary and use xnorpopcountmatmul
        y = xp.xnorpopcountmatmul((x + 1) / 2, (W + 1) / 2)
    else:
        y = np.matmul(x, W)
    if T is not None:
        y = multithreshold(y, T)
        if act == DataType.BIPOLAR:
            # binary to bipolar
            y = 2 * y - 1
        else:
            # signed offset
            y += act.min()
    oshape = model.get_tensor_shape("outp")
    y_expected = y.reshape(oshape)
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    assert (y_produced.reshape(y_expected.shape) == y_expected).all(), "npysim failed"
示例#3
0
def test_execute_multi_thresholding():

    inputs = np.ndarray(
        shape=(6, 3, 2, 2),
        buffer=np.array([
            4.8,
            3.2,
            1.2,
            4.9,
            7.8,
            2.4,
            3.1,
            4.7,
            6.2,
            5.1,
            4.9,
            2.2,
            6.2,
            0.0,
            0.8,
            4.7,
            0.2,
            5.6,
            8.9,
            9.2,
            9.1,
            4.0,
            3.3,
            4.9,
            2.3,
            1.7,
            1.3,
            2.2,
            4.6,
            3.4,
            3.7,
            9.8,
            4.7,
            4.9,
            2.8,
            2.7,
            8.3,
            6.7,
            4.2,
            7.1,
            2.8,
            3.1,
            0.8,
            0.6,
            4.4,
            2.7,
            6.3,
            6.1,
            1.4,
            5.3,
            2.3,
            1.9,
            4.7,
            8.1,
            9.3,
            3.7,
            2.7,
            5.1,
            4.2,
            1.8,
            4.1,
            7.3,
            7.1,
            0.4,
            0.2,
            1.3,
            4.3,
            8.9,
            1.4,
            1.6,
            8.3,
            9.4,
        ]),
    )

    thresholds = np.ndarray(
        shape=(3, 7),
        buffer=np.array([
            0.8,
            1.4,
            1.7,
            3.5,
            5.2,
            6.8,
            8.2,
            0.2,
            2.2,
            3.5,
            4.5,
            6.6,
            8.6,
            9.2,
            1.3,
            4.1,
            4.5,
            6.5,
            7.8,
            8.1,
            8.9,
        ]),
    )

    outputs = np.ndarray(
        shape=(6, 3, 2, 2),
        buffer=np.array([
            4.0,
            3.0,
            1.0,
            4.0,
            5.0,
            2.0,
            2.0,
            4.0,
            3.0,
            3.0,
            3.0,
            1.0,
            5.0,
            0.0,
            1.0,
            4.0,
            1.0,
            4.0,
            6.0,
            7.0,
            7.0,
            1.0,
            1.0,
            3.0,
            3.0,
            3.0,
            1.0,
            3.0,
            4.0,
            2.0,
            3.0,
            7.0,
            3.0,
            3.0,
            1.0,
            1.0,
            7.0,
            5.0,
            4.0,
            6.0,
            2.0,
            2.0,
            1.0,
            1.0,
            2.0,
            1.0,
            3.0,
            3.0,
            2.0,
            5.0,
            3.0,
            3.0,
            4.0,
            5.0,
            7.0,
            3.0,
            1.0,
            3.0,
            2.0,
            1.0,
            4.0,
            6.0,
            6.0,
            0.0,
            1.0,
            1.0,
            3.0,
            6.0,
            1.0,
            1.0,
            6.0,
            7.0,
        ]),
    )

    results = multithreshold(inputs, thresholds)

    assert (results == outputs).all()

    results_scaled = multithreshold(inputs, thresholds, 2.0, -1.0)
    outputs_scaled = 2.0 * outputs - 1.0
    assert (results_scaled == outputs_scaled).all()
示例#4
0
def test_fpgadataflow_fclayer_large_depth_decoupled_mode_rtlsim(
    mem_mode, idt, wdt, act, nf, sf, mw, mh
):
    if nf == -1:
        nf = mh
    if sf == -1:
        sf = mw
    pe = mh // nf
    simd = mw // sf
    assert mh % pe == 0
    assert mw % sf == 0
    # generate weights
    W = gen_finn_dt_tensor(wdt, (mw, mh))
    # generate input data
    x = gen_finn_dt_tensor(idt, (1, mw))
    if act is None:
        # no activation, produce accumulators
        T = None
        tdt = None
        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
            odt = DataType.UINT32
        else:
            odt = DataType.INT32
    else:
        odt = act
        (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw)
        n_steps = act.get_num_possible_values() - 1
        T = np.random.randint(min, max - 1, (mh, n_steps)).astype(np.float32)
        # provide non-decreasing thresholds
        T = np.sort(T, axis=1)
        # generate thresholds for activation
        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
            tdt = DataType.UINT32
            # bias thresholds to be positive
            T = np.ceil((T + mw) / 2)
            assert (T >= 0).all()
        else:
            tdt = DataType.INT32
    model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt)
    for node in model.graph.node:
        # lookup op_type in registry of CustomOps
        inst = getCustomOp(node)
        inst.set_nodeattr("mem_mode", mem_mode)

    # prepare input data
    input_dict = prepare_inputs(x, idt, wdt)
    if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
        # convert inputs to binary and use xnorpopcountmatmul
        y = xp.xnorpopcountmatmul((x + 1) / 2, (W + 1) / 2)
    else:
        y = np.matmul(x, W)
    if T is not None:
        y = multithreshold(y, T)
        if act == DataType.BIPOLAR:
            # binary to bipolar
            y = 2 * y - 1
        else:
            # signed offset
            y += act.min()
    oshape = model.get_tensor_shape("outp")
    y_expected = y.reshape(oshape)
    # TODO split up into several dependent tests -- need to check how this
    # works for parametrized tests...
    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP("xc7z020clg400-1", 5))
    model = model.transform(HLSSynthIP())
    model = model.transform(PrepareRTLSim())
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    assert (y_produced.reshape(y_expected.shape) == y_expected).all(), "rtlsim failed"

    hls_synt_res_est = model.analysis(hls_synth_res_estimation)
    assert "StreamingFCLayer_Batch_0" in hls_synt_res_est

    node = model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0]
    inst = getCustomOp(node)
    cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
    exp_cycles_dict = model.analysis(exp_cycles_per_layer)
    exp_cycles = exp_cycles_dict[node.name]
    assert np.isclose(exp_cycles, cycles_rtlsim, atol=15)
    assert exp_cycles != 0
示例#5
0
def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode):
    if nf == -1:
        nf = ich
    pe = ich // nf
    assert ich % pe == 0

    # generate input data
    x = gen_finn_dt_tensor(idt, (1, ich))

    odt = act
    n_steps = act.get_num_possible_values() - 1
    T = np.random.randint(idt.min(),
                          idt.max() + 1, (ich, n_steps)).astype(np.float32)
    # provide non-decreasing thresholds
    T = np.sort(T, axis=1)

    if odt == DataType.BIPOLAR:
        actval = 0
    else:
        actval = odt.min()

    model = make_single_thresholding_modelwrapper(T, pe, idt, odt, actval)

    if exec_mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model = model.transform(SetExecMode("cppsim"))
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    else:
        raise Exception("Unknown exec_mode")

    # package input data as dictionary
    input_dict = {"inp": x}

    y = multithreshold(x, T)
    if act == DataType.BIPOLAR:
        # binary to bipolar
        y = 2 * y - 1
    else:
        # signed offset
        y += act.min()

    oshape = model.get_tensor_shape("outp")
    y_expected = y.reshape(oshape)
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]

    y_produced = y_produced.reshape(y_expected.shape)

    assert (y_produced == y_expected).all(), "cppsim failed"

    if exec_mode == "rtlsim":
        hls_synt_res_est = model.analysis(hls_synth_res_estimation)
        assert "Thresholding_Batch_0" in hls_synt_res_est

        node = model.get_nodes_by_op_type("Thresholding_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
示例#6
0
def test_multithreshold():

    inputs = np.ndarray(
        shape=(6, 3, 2, 2),
        buffer=np.array([
            4.8,
            3.2,
            1.2,
            4.9,
            7.8,
            2.4,
            3.1,
            4.7,
            6.2,
            5.1,
            4.9,
            2.2,
            6.2,
            0.0,
            0.8,
            4.7,
            0.2,
            5.6,
            8.9,
            9.2,
            9.1,
            4.0,
            3.3,
            4.9,
            2.3,
            1.7,
            1.3,
            2.2,
            4.6,
            3.4,
            3.7,
            9.8,
            4.7,
            4.9,
            2.8,
            2.7,
            8.3,
            6.7,
            4.2,
            7.1,
            2.8,
            3.1,
            0.8,
            0.6,
            4.4,
            2.7,
            6.3,
            6.1,
            1.4,
            5.3,
            2.3,
            1.9,
            4.7,
            8.1,
            9.3,
            3.7,
            2.7,
            5.1,
            4.2,
            1.8,
            4.1,
            7.3,
            7.1,
            0.4,
            0.2,
            1.3,
            4.3,
            8.9,
            1.4,
            1.6,
            8.3,
            9.4,
        ]),
    )

    thresholds = np.ndarray(
        shape=(3, 7),
        buffer=np.array([
            0.8,
            1.4,
            1.7,
            3.5,
            5.2,
            6.8,
            8.2,
            0.2,
            2.2,
            3.5,
            4.5,
            6.6,
            8.6,
            9.2,
            1.3,
            4.1,
            4.5,
            6.5,
            7.8,
            8.1,
            8.9,
        ]),
    )

    outputs = np.ndarray(
        shape=(6, 3, 2, 2),
        buffer=np.array([
            4.0,
            3.0,
            1.0,
            4.0,
            5.0,
            2.0,
            2.0,
            4.0,
            3.0,
            3.0,
            3.0,
            1.0,
            5.0,
            0.0,
            1.0,
            4.0,
            1.0,
            4.0,
            6.0,
            7.0,
            7.0,
            1.0,
            1.0,
            3.0,
            3.0,
            3.0,
            1.0,
            3.0,
            4.0,
            2.0,
            3.0,
            7.0,
            3.0,
            3.0,
            1.0,
            1.0,
            7.0,
            5.0,
            4.0,
            6.0,
            2.0,
            2.0,
            1.0,
            1.0,
            2.0,
            1.0,
            3.0,
            3.0,
            2.0,
            5.0,
            3.0,
            3.0,
            4.0,
            5.0,
            7.0,
            3.0,
            1.0,
            3.0,
            2.0,
            1.0,
            4.0,
            6.0,
            6.0,
            0.0,
            1.0,
            1.0,
            3.0,
            6.0,
            1.0,
            1.0,
            6.0,
            7.0,
        ]),
    )

    results = multithreshold(inputs, thresholds)
    assert (results == outputs).all()

    results_scaled = multithreshold(inputs, thresholds, 2.0, -1.0)
    outputs_scaled = 2.0 * outputs - 1.0
    assert (results_scaled == outputs_scaled).all()

    # performance and random test
    np.random.seed(0)
    inputs = np.random.random((1, 256, 64, 64))
    thresholds = (np.array([[1, 2, 3, 4, 5, 6]]) - 0.5) / 6

    before = time.time()
    vec_results = multithreshold(inputs, thresholds)
    after = time.time()
    vector_runtime = after - before

    before = time.time()
    nonvec_results = multithreshold_elementwise(inputs, thresholds)
    after = time.time()
    non_vector_runtime = after - before

    assert (vec_results == nonvec_results).all()

    return vector_runtime, non_vector_runtime