예제 #1
0
def fpga_process(qin, qout):
    init_fpga()
    while True:
        inputs = qin.get()
        if inputs is None:
            break
        
        # print(" Prepare inputs for FPGA ...\n")
        fpgaInputs = pyxfdnn.prepareInputsForFpga(inputs, 
                config["quantizecfg"], config["scaleB"], -1, config["firstfpgalayer"]) 
        if not fpgaInputs:
            break

        # print(" Executing on FPGA!\n")
        pyxfdnn.execute(
                config["fpgacommands"],
                config["weightsBlob"],
                fpgaInputs,
                config["g_fpgaOutput"],
                g_batchSize, 
                config["quantizecfg"],
                config["scaleB"]
                )

        # print(" Done, put result back to q.\n")
        qout.put(config["g_fpgaOutput"])

    qout.put(None)
    pyxfdnn.closeHandle()
예제 #2
0
    def __exit__(self, *a):
        self.stop()
        self.proc_fpga.join()
        self.proc_bbox.join()

        if self.xdnn_handle:
            xdnn.closeHandle()
예제 #3
0
def main():
    args = xdnn_io.processCommandLine()

    ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0")
    if ret != 0:
        sys.exit(1)

    fpgaRT = xdnn.XDNNFPGAOp(handles, args)
    fpgaOutput = fpgaRT.getOutputs()
    fpgaInput = fpgaRT.getInputs()

    fcWeight, fcBias = xdnn_io.loadFCWeightsBias(args)
    img_paths = xdnn_io.getFilePaths(args['images'])
    fcOutput = np.empty((
        args['batch_sz'],
        args['outsz'],
    ),
                        dtype=np.float32,
                        order='C')

    inShape = (args['batch_sz'], ) + tuple(
        tuple(fpgaRT.getInputDescriptors().values())[0][1:])

    labels = xdnn_io.get_labels(args['labels'])
    if args['golden']:
        goldenMap = xdnn_io.getGoldenMap(args['golden'])
        top5Count = 0
        top1Count = 0

    firstInput = list(fpgaInput.values())[0]
    firstOutput = list(fpgaOutput.values())[0]

    for i in range(0, len(img_paths), args['batch_sz']):
        pl = []
        for j, p in enumerate(img_paths[i:i + args['batch_sz']]):
            firstInput[j, ...], _ = xdnn_io.loadImageBlobFromFile(
                p, args['img_raw_scale'], args['img_mean'],
                args['img_input_scale'], inShape[2], inShape[3])
            pl.append(p)

        fpgaRT.execute(fpgaInput, fpgaOutput)
        xdnn.computeFC(fcWeight, fcBias, firstOutput, fcOutput)
        softmaxOut = xdnn.computeSoftmax(fcOutput)
        xdnn_io.printClassification(softmaxOut, pl, labels)
        if args['golden']:
            for j, p in enumerate(img_paths[i:i + args['batch_sz']]):
                top1Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p,
                                            labels, 1)
                top5Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p,
                                            labels, 5)

    xdnn.closeHandle()
    if args['golden']:
        print("\nAverage accuracy (n=%d) Top-1: %.1f%%, Top-5: %.1f%%\n") % (
            len(img_paths), float(top1Count) / float(len(img_paths)) * 100.,
            float(top5Count) / float(len(img_paths)) * 100.)
예제 #4
0
def main(argv):
    args = xdnn_io.processCommandLine(argv)
    ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0")
    # ret = xdnn.createHandle(g_xclbin, "kernelSxdnn_0", g_xdnnLib)
    if ret != 0:
        sys.exit(1)
    labels = xdnn_io.get_labels(args['labels'])

    # TODO dict of tuples instead?
    fpgaRT = {}
    fpgaOutputs = {}
    fcWeights = {}
    fcBiases = {}
    netFiles = {}
    confNames = []

    args = args['jsoncfg']  # we do not use other args' keys
    for netconf_args in args:

        confName = str(netconf_args['name'])
        confNames += [confName]
        # netconf_args['netcfg'] = './data/{}_{}.json'.format(netconf_args['net'], netconf_args['dsp'])
        fpgaRT[confName] = xdnn.XDNNFPGAOp(handles, netconf_args)
        netconf_args['in_shape'] = tuple((netconf_args['batch_sz'], ) + tuple(
            fpgaRT[confName].getInputDescriptors().itervalues().next()[1:]))
        (fcWeights[confName],
         fcBiases[confName]) = xdnn_io.loadFCWeightsBias(netconf_args)
        fpgaOutputs[confName] = np.empty((
            netconf_args['batch_sz'],
            int(netconf_args['fpgaoutsz']),
        ),
                                         dtype=np.float32,
                                         order='C')
        netFiles[confName] = str(netconf_args['netcfg'])

    batchArrays = []
    for streamId, netconf_args in enumerate(args):
        batchArrays.append(
            np.empty(netconf_args['in_shape'], dtype=np.float32, order='C'))
        pl = []
        img_paths = xdnn_io.getFilePaths(netconf_args['images'])
        for j, p in enumerate(img_paths[:netconf_args['batch_sz']]):
            batchArrays[-1][j, ...], _ = xdnn_io.loadImageBlobFromFile(
                p, netconf_args['img_raw_scale'], netconf_args['img_mean'],
                netconf_args['img_input_scale'], netconf_args['in_shape'][2],
                netconf_args['in_shape'][3])
            pl.append(p)

        confName = str(netconf_args['name'])
        firstInputName = fpgaRT[confName].getInputs().iterkeys().next()
        firstOutputName = fpgaRT[confName].getOutputs().iterkeys().next()
        fpgaRT[confName].exec_async({firstInputName: batchArrays[-1]},
                                    {firstOutputName: fpgaOutputs[confName]},
                                    streamId)

    for streamId, confName in enumerate(confNames):
        fpgaRT[confName].get_result(streamId)

    for netconf_args in args:
        confName = str(netconf_args['name'])
        fcOut = np.empty((netconf_args['batch_sz'], netconf_args['outsz']),
                         dtype=np.float32,
                         order='C')
        xdnn.computeFC(fcWeights[confName], fcBiases[confName],
                       fpgaOutputs[confName], fcOut)

        softmaxOut = xdnn.computeSoftmax(fcOut)
        xdnn_io.printClassification(softmaxOut, netconf_args['images'], labels)

    xdnn.closeHandle()
예제 #5
0
    def __exit__(self, *a):
        self.stop()

        if self.xdnn_handle:
            xdnn.closeHandle()
예제 #6
0
config["labels"] = "../models/caffe/flowers102/data/synset_words.txt"
pyxfdnn_io.printClassification(softmaxOut, config)

#Print Original Image for Reference
img = cv2.imread(config["images"][0])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)
plt.title(config["images"])
plt.show()

# ### 13. Close the handle

# In[15]:

pyxfdnn.closeHandle()

# ### 14. Your Turn!
# Great work! Now it is your turn!
#
# We have another trained model which leverages the Inception v1 architecture.
# This one is trained on the flowers dataset which has 102 classes.
#
# The final, fully connected layer has only 102 outputs for 102 output categories.
#
# This means that the graph and weights are different.
#
# Update this notebook to classify pretty flowers instead!
#
# Start by clicking **Kernel** from the menu, and then select **Reset & Clear Output**.
#
예제 #7
0
def fpga_process(fpgaRT, args, num_img, compJson, shared_trans_arrs,
                 shared_output_arrs):
    if fpgaRT is None:
        ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0",
                                         [args["deviceID"]])
        if ret != 0:
            sys.exit(1)
        fpgaRT = xdnn.XDNNFPGAOp(handles, args)
    else:
        print "fpga process handle was ready:"
    qWait = mp.Queue(maxsize=100)
    numStreams = args['numstream']
    bsz = args['batch_sz']
    input_ptrs = [[] for i in range(numStreams)]

    numProcessed = 0
    t = threading.Thread(target=fpga_wait,
                         args=(fpgaRT, qWait, shared_output_arrs,
                               shared_trans_arrs))
    t.start()

    input_shapes = map(lambda x: (x), compJson.getInputs().itervalues())
    output_shapes = map(lambda x: (x), compJson.getOutputs().itervalues())

    InputName_list = map(lambda x: str(x), compJson.getInputs().iterkeys())
    OutputName_list = map(lambda x: str(x), compJson.getOutputs().iterkeys())
    num_inputs = len(input_shapes)
    num_outputs = len(output_shapes)

    startTime = time.time()
    while numProcessed < num_img or args['perpetual']:

        write_slot = shared_output_arrs.openWriteId()
        write_slot_arrs = shared_output_arrs.accessNumpyBuffer(write_slot)

        in_dict = {}
        out_dict = {}

        for out_idx in range(num_outputs):
            out_dict[OutputName_list[out_idx]] = write_slot_arrs[out_idx]

        read_slot_arrs_list = []
        read_slot_list = []
        for img_num in range(args['batch_sz']):
            read_slot = shared_trans_arrs.openReadId()

            if read_slot is None:
                break
            read_slot_arrs = shared_trans_arrs.accessNumpyBuffer(read_slot)
            read_slot_arrs_list.append(read_slot_arrs)
            read_slot_list.append(read_slot)

            write_slot_arrs[-1][img_num][:] = read_slot_arrs[-1][:]

            numProcessed += 1
            if (args['perpetual'] == False):
                if numProcessed == num_img:
                    break

        images_added = len(read_slot_arrs_list)

        # when number of images avaiable are less than the batch size, fill the rest of the out buffer image-id  slots with -1
        for img_num in range(images_added, args['batch_sz']):
            write_slot_arrs[-1][img_num][:] = -1

        for in_idx in range(num_inputs):
            in_dict[InputName_list[in_idx]] = []
            for img_idx in range(len(read_slot_arrs_list)):
                in_dict[InputName_list[in_idx]].append(
                    read_slot_arrs_list[img_idx][in_idx])

        fpgaRT.exec_async(in_dict, out_dict, write_slot)
        qWait.put((write_slot, read_slot_list, img_num))
        #shared_trans_arrs.closeReadId(read_slot)

    qWait.put((None, None, None))
    t.join()
    elapsedTime = (time.time() - startTime)
    print("FPGA_process: ", float(numProcessed) / elapsedTime, "img/s")

    xdnn.closeHandle()