示例#1
0
def prepareImages(args, PE=-1):
    batch_sz = len(args['images'])
    inputs, shapes = loadImages(args['images'], args['transform'],
                                args['img_mean'], args['in_shape'])
    fpgaInputs = xdnn.prepareInputsForFpga(inputs, args['quantizecfg'],
                                           args['scaleB'], PE,
                                           args['firstfpgalayer'])

    return fpgaInputs, shapes, batch_sz
示例#2
0
def prepareImages(args, PE=-1):
    batch_sz = len(args['images'])
    inputs, shapes = loadImages(args['images'], args['transform'],
                                args['img_raw_scale'], args['img_mean'],
                                args['img_input_scale'], args['in_shape'])
    #print('image shape {}, path {}, {}'.format(inputs.shape, args['images'], inputs[:10]))
    fpgaInputs = xdnn.prepareInputsForFpga(inputs, args['quantizecfg'],
                                           args['scaleB'], PE,
                                           args['firstfpgalayer'])

    return fpgaInputs, shapes, batch_sz
示例#3
0
def prepareRawInputs(args, PE=-1):
    if args['batch_sz'] != 1:
        batch_sz = int(args['batch_sz'])
    else:
        batch_sz = 1
    inputs, shapes = loadRawDataInput(args['datadir'], batch_sz,
                                      args['in_shape'])
    fpgaInputs = xdnn.prepareInputsForFpga(inputs, args['quantizecfg'],
                                           args['scaleB'], PE,
                                           args['firstfpgalayer'])

    return fpgaInputs, shapes, batch_sz
示例#4
0
def prepareRawInputs(args, PE=-1):
    print('import Raw Input')
    if args['batch_sz'] != 1:
        batch_sz = int(args['batch_sz'])
    else:
        batch_sz = 1
    inputs, shapes = loadRawDataInput(args['datadir'], batch_sz,
                                      args['in_shape'])
    #print('image shape {}, path {}, {}'.format(inputs.shape, args['datadir'], inputs[:10]))
    fpgaInputs = xdnn.prepareInputsForFpga(inputs, args['quantizecfg'],
                                           args['scaleB'], PE,
                                           args['firstfpgalayer'])

    return fpgaInputs, shapes, batch_sz
示例#5
0
def prepareFpgaInputs(inputs):
    fpgaInputs = xdnn.prepareInputsForFpga(inputs, g_fpgaCfgFile, g_scaleB, -1,
                                           g_firstFpgaLayerName)

    return fpgaInputs
示例#6
0
def img_classify(msg):
    global g_inputs
    global g_inputbuf
    global g_fpgaOutput
    global g_weightsBlob
    global g_fcWeight
    global g_fcBias

    # message is a rowset, one col, a list of file names.
    rs = msg.rowset
    if len(rs.columns) == 0 or rs.columns[0].nrow == 0:
        print("Img classify request size is 0.\n")
        return None
    print("Img classify request size is {0}.\n".format(rs.columns[0].nrow))
    # Lock the fpga device.   config is protected by this lock as well.

    fpga_lock.acquire()
    ret = None

    for i in range(rs.columns[0].nrow):
        fname = rs.columns[0].sdata[i]
        print("Running classification for images: {0}\n".format(fname))
        print("Prepare inputs ...\n")

        # g_batchSize = 1, for now.
        print "g_inputs", g_inputs
        g_inputs[0] = xdnn_io.loadImageBlobFromFile(str(fname), g_mean,
                                                    g_img_h, g_img_w)

        print("Quantize inputs ...\n")
        quantizeInputs = xdnn.quantizeInputs(g_firstFpgaLayerName, g_inputs,
                                             None, None, g_fpgaCfgFile,
                                             g_scaleB)

        print("Prepare inputs for fpga inputs ...\n")
        fpgaInputs = xdnn.prepareInputsForFpga(quantizeInputs, g_fpgaCfgFile,
                                               g_scaleB, -1,
                                               g_firstFpgaLayerName)

        print("Run FPGA commands ...\n")
        xdnn.execute(g_netFile, g_weightsBlob, fpgaInputs, g_fpgaOutput,
                     g_batchSize, g_fpgaCfgFile, g_scaleB, g_PE)

        print("Compute FC ...\n")
        fcOutput = xdnn.computeFC(g_fcWeight, g_fcBias, g_fpgaOutput,
                                  g_batchSize, g_outputSize, g_fpgaOutputSize,
                                  g_useBlas)

        print("Softmax ...\n")
        softmaxOut = xdnn.computeSoftmax(fcOutput, g_batchSize)
        ret = get_classification(softmaxOut, fname)

    fpga_lock.release()

    # Now construct return msg
    if ret == None:
        print("Return None: ???\n")
        return None

    retmsg = xdrive_pb2.XMsg()
    rs = retmsg.rowset
    # return 4 columns, (filename, ordinal, score, class)
    col1 = rs.columns.add()
    col2 = rs.columns.add()
    col3 = rs.columns.add()
    col4 = rs.columns.add()
    col1.nrow = len(ret)
    col2.nrow = len(ret)
    col3.nrow = len(ret)
    col4.nrow = len(ret)

    for i in range(len(ret)):
        (a, b, c, d) = ret[i]
        # print("Return {0}, {1}, {2}, {3}.\n".format(a, b, c, d))
        col1.nullmap.append(False)
        col1.sdata.append(a)
        col2.nullmap.append(False)
        col2.i32data.append(b)
        col3.nullmap.append(False)
        col3.f64data.append(c)
        col4.nullmap.append(False)
        col4.sdata.append(d)

    return retmsg
示例#7
0
def XDLFprepareRawInputs(args, RawInputs, PE=-1):
    fpgaInputs = xdnn.prepareInputsForFpga(RawInputs, args['quantizecfg'],
                                           args['scaleB'], PE,
                                           args['firstfpgalayer'])
    return fpgaInputs
示例#8
0
def benchmark():

    mode = "Non-Blocking"
    #mode = "Blocking"

    # Extract Arguments from json
    args = xdnn_io.processCommandLine()["jsoncfg"][0]

    if "platform" in args:
        args["xclbin"] = "../../overlaybins/" + str(
            args["platform"]) + "/" + args["xclbin"]

    # Establish Communication w/ FPGA
    if xdnn.createHandle(args['xclbin'], libFile=args['xlnxlib']):
        sys.exit(1)

    # Transfer weights to device memory
    if "usexdnnv3" in args and args["usexdnnv3"] == "1":
        weightsBlob = xdnn_io.loadWeightsBiasQuantv3(args)
    else:
        weightsBlob = xdnn_io.loadWeightsBiasQuant(args)

    # Create random input data
    fpgaInputs = []
    fpgaInputs.append(
        np.float32(
            np.random.standard_normal(
                (args["batchsz"], reduce(mul, args["in_shape"], 1)))))
    fpgaInputs[0] = xdnn.quantizeInputs(args["firstfpgalayer"],
                                        args["quantizecfg"], args["scaleB"],
                                        fpgaInputs[0])
    fpgaInputs[0] = xdnn.prepareInputsForFpga(fpgaInputs[0],
                                              args["quantizecfg"],
                                              args["scaleB"], -1,
                                              args["firstfpgalayer"], 0)
    fpgaInputs.append(
        np.float32(
            np.random.standard_normal(
                (args["batchsz"], reduce(mul, args["in_shape"], 1)))))
    fpgaInputs[1] = xdnn.quantizeInputs(args["firstfpgalayer"],
                                        args["quantizecfg"], args["scaleB"],
                                        fpgaInputs[1])
    fpgaInputs[1] = xdnn.prepareInputsForFpga(fpgaInputs[1],
                                              args["quantizecfg"],
                                              args["scaleB"], -1,
                                              args["firstfpgalayer"], 1)

    # Create buffers in host memory for result
    fpgaOutputs = []
    fpgaOutputs.append(
        xdnn_io.prepareOutput(args['fpgaoutsz'], args["batchsz"]))
    fpgaOutputs.append(
        xdnn_io.prepareOutput(args['fpgaoutsz'], args["batchsz"]))

    # Load network schedule to accelerator
    xdnn.initScript(args['netcfg'], weightsBlob, args["batchsz"],
                    args['quantizecfg'], args['scaleB'], args['PE'], 0)
    xdnn.initScript(args['netcfg'], weightsBlob, args["batchsz"],
                    args['quantizecfg'], args['scaleB'], args['PE'], 1)

    # Run forward propagation N times
    print("Running inference...\n")
    cumulative_time = -1 * timeit.default_timer()

    if mode == "Non-Blocking":

        xdnn.exec_async(args['netcfg'], weightsBlob, fpgaInputs[0],
                        fpgaOutputs[0], args["batchsz"], args['quantizecfg'],
                        args['scaleB'], args['PE'], 0)
        xdnn.exec_async(args['netcfg'], weightsBlob, fpgaInputs[1],
                        fpgaOutputs[1], args["batchsz"], args['quantizecfg'],
                        args['scaleB'], args['PE'], 1)

        for i in range(args["iterations"] / 2 - 1):
            xdnn.get_result(-1, 0)  # get 0
            xdnn.exec_async(args['netcfg'], weightsBlob, fpgaInputs[0],
                            fpgaOutputs[0], args["batchsz"],
                            args['quantizecfg'], args['scaleB'], args['PE'],
                            0)  # push 0
            xdnn.get_result(-1, 1)  # get 1
            xdnn.exec_async(args['netcfg'], weightsBlob, fpgaInputs[1],
                            fpgaOutputs[1], args["batchsz"],
                            args['quantizecfg'], args['scaleB'], args['PE'],
                            1)  # push 1

        xdnn.get_result(-1, 0)  # get 0
        xdnn.get_result(-1, 1)  # get 1

    else:
        for i in range(args["iterations"]):
            xdnn.execute(args['netcfg'], weightsBlob, fpgaInputs[0],
                         fpgaOutputs[0], args["batchsz"], args['quantizecfg'],
                         args['scaleB'], args['PE'])

    cumulative_time += timeit.default_timer()

    # Summarize
    print("===========================================")
    print("Performance Summary\n")
    print("  Network: %s" % (args["name"]))
    print("  Precision: %d" % (args["precision"]))
    print("  Images: %d" % (args["iterations"] * args["batchsz"]))
    print("  Batch Size: %d" % (args["batchsz"]))
    print("  Total Batches: %d" % (args["iterations"]))
    print("  Total Time: %.2f ms" % (1000 * cumulative_time))
    print("  SIL: %.2f ms" %
          (1000 * cumulative_time /
           args["iterations"]))  # Time per batch # Single Image Latency
    print("  FPS: %.2f" %
          (args["iterations"] * args["batchsz"] / cumulative_time))
    print("  GOPS: %.2f" % (args["ops"] * args["iterations"] *
                            args["batchsz"] / cumulative_time / 1000000000))
    print("===========================================\n")

    # Release FPGA
    xdnn.closeHandle()