Exemplo n.º 1
0
def main(argv):
    args = xdnn_io.processCommandLine(argv)
    ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0")
    # ret = xdnn.createHandle(g_xclbin, "kernelSxdnn_0", g_xdnnLib)
    if ret != 0:
      sys.exit(1)
    labels = xdnn_io.get_labels(args['labels'])

    # TODO dict of tuples instead?
    fpgaRT          = {}
    fpgaOutputs     = {}
    fcWeights       = {}
    fcBiases        = {}
    netFiles        = {}
    confNames       = []

    args = args['jsoncfg']      # we do not use other args' keys
    for netconf_args in args:
      
      confName   = str(netconf_args['name'])
      confNames += [confName]
      # netconf_args['netcfg'] = './data/{}_{}.json'.format(netconf_args['net'], netconf_args['dsp'])
      fpgaRT[confName] = xdnn.XDNNFPGAOp(handles, netconf_args)
      netconf_args['in_shape'] = tuple((netconf_args['batch_sz'],) + tuple(fpgaRT[confName].getInputDescriptors().itervalues().next()[1:] )) 
      (fcWeights[confName],
        fcBiases[confName]) = xdnn_io.loadFCWeightsBias(netconf_args)
      fpgaOutputs[confName]             = np.empty ((netconf_args['batch_sz'], int(netconf_args['fpgaoutsz']),), dtype=np.float32, order='C')
      netFiles[confName]                = str(netconf_args['netcfg'])

    batchArrays = []
    for streamId, netconf_args in enumerate(args):
      batchArrays.append(np.empty(netconf_args['in_shape'], dtype=np.float32, order='C'))
      pl = []
      img_paths = xdnn_io.getFilePaths(netconf_args['images'])
      for j, p in enumerate(img_paths[:netconf_args['batch_sz']]):
        batchArrays[-1][j, ...], _ = xdnn_io.loadImageBlobFromFile(p, netconf_args['img_raw_scale'],
                                                                  netconf_args['img_mean'],
                                                                  netconf_args['img_input_scale'],
                                                                  netconf_args['in_shape'][2],
                                                                  netconf_args['in_shape'][3])
        pl.append(p)

      confName = str(netconf_args['name'])
      firstInputName = fpgaRT[confName].getInputs().iterkeys().next()
      firstOutputName = fpgaRT[confName].getOutputs().iterkeys().next()
      fpgaRT[confName].exec_async({ firstInputName : batchArrays[-1] }, { firstOutputName : fpgaOutputs[confName] }, streamId)

    for streamId, confName in enumerate(confNames):
      fpgaRT[confName].get_result (streamId)

    for netconf_args in args:
      confName = str(netconf_args['name'])
      fcOut = np.empty( (netconf_args['batch_sz'], netconf_args['outsz']), dtype=np.float32, order = 'C')
      xdnn.computeFC (fcWeights[confName], fcBiases[confName], fpgaOutputs[confName], fcOut)

      softmaxOut = xdnn.computeSoftmax(fcOut)
      xdnn_io.printClassification(softmaxOut, netconf_args['images'], labels);

    xdnn.closeHandle()
Exemplo n.º 2
0
def init_fpga():
    # Instead of using command line, we hard code it here.
    # Typing correct args is almost impossible so either do it in .sh or .py
    #
    global g_args
    global g_ctxt
    print(" --- INIT FPGA --- \n")
    xdnnArgs = build_xdnn_args()
    print(xdnnArgs)
    g_args = xdnn_io.processCommandLine(xdnnArgs)
    print(" --- After parsing --- \n")
    print(g_args)

    print(" --- Create handle --- \n")
    ret, handles = xdnn.createHandle(g_args['xclbin'], "kernelSxdnn_0")
    if ret != 0:
        print(" --- !!! FAILED: Cannot create handle. --- \n")
        sys.exit(1)

    print(" --- Create fpgaRT --- \n")
    fpgaRT = xdnn.XDNNFPGAOp(handles, g_args)
    g_ctxt["fpgaRT"] = fpgaRT

    print(" --- Weight and Bias --- \n")
    fcWeight, fcBias = xdnn_io.loadFCWeightsBias(g_args)
    g_ctxt["fcWeight"] = fcWeight
    g_ctxt["fcBias"] = fcBias

    print(" --- Init input input/output area --- \n")
    if is_deploymode():
        g_ctxt['fpgaOutput'] = fpgaRT.getOutputs()
        g_ctxt['fpgaInput'] = fpgaRT.getInputs()
        g_ctxt['inShape'] = (g_args['batch_sz'], ) + tuple(
            fpgaRT.getInputDescriptors().itervalues().next()[1:])
    else:
        g_ctxt['fpgaOutput'] = np.empty((
            g_args['batch_sz'],
            g_args['fpgaoutsz'],
        ),
                                        dtype=np.float32,
                                        order='C')
        g_ctxt['batch_array'] = np.empty(
            ((g_args['batch_sz'], ) + g_args['in_shape']),
            dtype=np.float32,
            order='C')

    g_ctxt['fcOutput'] = np.empty((
        g_args['batch_sz'],
        g_args['outsz'],
    ),
                                  dtype=np.float32,
                                  order='C')

    print(" --- Get lables --- \n")
    g_ctxt['labels'] = xdnn_io.get_labels(g_args['labels'])
    # golden?   What is that?
    # Seems we are done.

    print(" --- FPGA INITIALIZED! ---\n")
Exemplo n.º 3
0
def main():
    args = xdnn_io.processCommandLine()

    ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0")
    if ret != 0:
        sys.exit(1)
    fpgaRT = xdnn.XDNNFPGAOp(handles, args)
    fcWeight, fcBias = xdnn_io.loadFCWeightsBias(args)
    img_paths = xdnn_io.getFilePaths(args['images'])
    fpgaOutput = np.empty((
        args['batch_sz'],
        args['fpgaoutsz'],
    ),
                          dtype=np.float32,
                          order='C')
    fcOutput = np.empty((
        args['batch_sz'],
        args['outsz'],
    ),
                        dtype=np.float32,
                        order='C')
    batch_array = np.empty(((args['batch_sz'], ) + args['in_shape']),
                           dtype=np.float32,
                           order='C')
    labels = xdnn_io.get_labels(args['labels'])
    if args['golden']:
        goldenMap = xdnn_io.getGoldenMap(args['golden'])
        top5Count = 0
        top1Count = 0

    for i in xrange(0, len(img_paths), args['batch_sz']):
        pl = []
        for j, p in enumerate(img_paths[i:i + args['batch_sz']]):
            batch_array[j, ...], _ = xdnn_io.loadImageBlobFromFile(
                p, args['img_raw_scale'], args['img_mean'],
                args['img_input_scale'], args['in_shape'][2],
                args['in_shape'][1])
            pl.append(p)

        fpgaRT.execute(batch_array, fpgaOutput)
        xdnn.computeFC(fcWeight, fcBias, fpgaOutput, args['batch_sz'],
                       args['outsz'], args['fpgaoutsz'], fcOutput)
        softmaxOut = xdnn.computeSoftmax(fcOutput)
        xdnn_io.printClassification(softmaxOut, pl, labels)
        if args['golden']:
            for j, p in enumerate(img_paths[i:i + args['batch_sz']]):
                top1Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p,
                                            labels, 1)
                top5Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p,
                                            labels, 5)

    xdnn.closeHandle()
    if args['golden']:
        print("\nAverage accuracy (n=%d) Top-1: %.1f%%, Top-5: %.1f%%\n") % (
            len(img_paths), float(top1Count) / float(len(img_paths)) * 100.,
            float(top5Count) / float(len(img_paths)) * 100.)
def fpga_process_async(qFrom, qTo, args, num_img, sharedInputArrs, prepProcQ,
                       streamQ, fpgaOutputs):

    ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0",
                                     [args["deviceID"]])
    if ret != 0:
        sys.exit(1)
    fpgaRT = xdnn.XDNNFPGAOp(handles, args)

    qWait = mp.Queue(maxsize=100)

    numStreams = args['numstream']
    bsz = args['batch_sz']
    input_ptrs = []
    for i in range(numStreams):
        input_ptrs.append([])

    numProcessed = 0
    t = threading.Thread(target=xdnn_wait,
                         args=(
                             fpgaRT,
                             qWait,
                             qTo,
                             prepProcQ,
                         ))
    t.start()
    #startTime = time.time()
    while numProcessed < num_img or args['perpetual']:
        img_list = np.full((bsz, ), -1, dtype=np.int32)
        sId = streamQ.get()
        input_ptrs[sId] = []
        shMemIdxArr = []
        for j in range(bsz):
            (sMemIdx, img_idx) = qFrom.get()
            numProcessed += 1
            img_list[j] = img_idx
            nparr_view = np.frombuffer(sharedInputArrs[sMemIdx].get_obj(),
                                       dtype=np.float32)
            nparr_view = nparr_view[np.newaxis, ...]
            input_ptrs[sId].append(
                nparr_view.ctypes.data_as(ctypes.POINTER(ctypes.c_float)))
            shMemIdxArr.append(sMemIdx)
            if numProcessed == num_img:
                break

        npout_view = np.frombuffer(fpgaOutputs[sId].get_obj(),
                                   dtype=np.float32)
        fpgaRT.exec_async(input_ptrs[sId], npout_view, sId)

        qWait.put((sId, img_list, shMemIdxArr))

    qWait.put((None, None, None))
    #elapsedTime = ( time.time() - startTime )
    #print ( "FPGA_process: ", float(numProcessed)/elapsedTime, "img/s")
    t.join()
    xdnn.closeHandle()
Exemplo n.º 5
0
def fpga_process_async (qFrom, qTo, args, num_img, sharedInputArrs, prepProcQ,  streamQ, fpgaOutputs, compJson):

  ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0", [args["deviceID"]])
  if ret != 0:
    sys.exit(1)
  fpgaRT = xdnn.XDNNFPGAOp(handles, args)

  qWait = mp.Queue(maxsize=100)

  numStreams = args['numstream']
  bsz = args['batch_sz']
  input_ptrs = [[] for i in range(numStreams)]

  numProcessed = 0
  t = threading.Thread(target=xdnn_wait, args=(fpgaRT, qWait, qTo, prepProcQ, ))
  t.start()
  
  firstInputName = compJson.getInputs().iterkeys().next()
  firstOutputName = compJson.getOutputs().iterkeys().next()
  firstOutputShape = compJson.getOutputs().itervalues().next()
  firstInputShape = compJson.getInputs().itervalues().next()
  #startTime = time.time()
  while numProcessed < num_img or args['perpetual']:
    img_list = np.full( (bsz,), -1, dtype = np.int32 )
    sId = streamQ.get()
    input_ptrs[sId] = []
    shMemIdxArr = []
    for j in range(bsz):
      (sMemIdx, img_idx) = qFrom.get()
      numProcessed += 1
      img_list[j] = img_idx
      nparr_view = np.frombuffer(sharedInputArrs[sMemIdx].get_obj(), dtype = np.float32)
      #nparr_view = np.frombuffer(sharedInputArrs[sMemIdx].get_obj(), dtype = np.float32).reshape ( tuple ( firstInputShape ))
      input_ptrs[sId].append( nparr_view )
      shMemIdxArr.append(sMemIdx)
      if numProcessed == num_img:
        break

    npout_view = np.frombuffer(fpgaOutputs[sId].get_obj(), dtype = np.float32).reshape( (args['batch_sz'],) + tuple ( firstOutputShape[1:]) )
    fpgaRT.exec_async( {firstInputName : input_ptrs[sId]}, {firstOutputName : npout_view}, sId)

    qWait.put((sId, img_list, shMemIdxArr))

  qWait.put ((None, None, None))
  #elapsedTime = ( time.time() - startTime )
  #print ( "FPGA_process: ", float(numProcessed)/elapsedTime, "img/s")
  t.join()
  xdnn.closeHandle()
Exemplo n.º 6
0
    def setup(self, bottom, top):
        self.param_dict = eval(self.param_str)  # Get args from prototxt
        self._args = xdnn_io.make_dict_args(self.param_dict)
        self._numPE = self._args[
            "batch_sz"]  # Bryan hack to detremine number of PEs in FPGA
        # Establish FPGA Communication, Load bitstream
        ret, handles = xdnn.createHandle(self._args["xclbin"], "kernelSxdnn_0")
        if ret != 0:
            raise Exception("Failed to open FPGA handle.")

        self._args["scaleB"] = 1
        self._args["PE"] = -1
        # Instantiate runtime interface object
        self.fpgaRT = xdnn.XDNNFPGAOp(handles, self._args)
        self._indictnames = self._args["input_names"]
        self._outdictnames = self._args["output_names"]
        self._parser = xdnn.CompilerJsonParser(self._args["netcfg"])
Exemplo n.º 7
0
    def __init__(self, maxNumStreams):
        self._maxNumStreams = maxNumStreams
        self._streamsAvailable = []
        self._streamInputs = []
        self._streamOutputs = []

        self._config = xdnn_io.processCommandLine()
        ret, handles = xdnn.createHandle(self._config['xclbin'])
        if ret != 0:
            sys.exit(1)

        self._fpgaRT = xdnn.XDNNFPGAOp(handles, self._config)
        self._fcWeight, self._fcBias = xdnn_io.loadFCWeightsBias(self._config)
        self._labels = xdnn_io.get_labels(self._config['labels'])

        for i in range(maxNumStreams):
            self._streamsAvailable.append(i)
            self._streamInputs.append(None)
            self._streamOutputs.append(None)
def executeOnFPGA(sProtoBufPath, Qmode, Inference_Data, handle, name,
                  num_models):
    TOTAL_IMAGES = 128

    # Create handle for FPGA
    ret, handle = xdnn.createHandle(
        "../overlaybins/" + "aws" + "/overlay_1.xclbin", "kernelSxdnn_0")

    #Initialize objects to store results
    fpgaRT = {}
    fpgaOutput = {}
    fcWeight = {}
    fcBias = {}
    netFiles = {}
    confNames = []

    #Generate batch
    batch_array = generateRandomBatch(TOTAL_IMAGES, None)

    #Get Image batch to start inference

    for i in range(0, num_models):
        confNames += [str(i)]
        #Generate batch 10 * batchsize
        config = initializeFpgaModel(sProtoBufPath, Qmode)
        config["PE"] = i
        config["name"] = config["name"] + "_" + str(i)
        # Load weights to FPGA
        config = TransferWeightsFPGA(len(batch_array), config, handle, i)
        fpgaRT[str(i)] = xdnn.XDNNFPGAOp(handle, config)
        (fcWeight[str(i)], fcBias[str(i)]) = xdnn_io.loadFCWeightsBias(config)
        fpgaOutput[str(i)], fcOutput, config = AllocateMemoryToHost(config)

    start0 = time.time()
    # Schedule FPGA execution asynchronously
    for i in range(0, num_models):
        fpgaRT[str(i)].exec_async(batch_array, fpgaOutput[str(i)], i)

    start1 = time.time()

    #Fetch results of all parallel executions
    for i in range(0, num_models):
        #Get FPGA output
        ret = fpgaRT[str(i)].get_result(i)
        #Compute Inner product - fully connected layer
        xdnn.computeFC(fcWeight[str(i)], fcBias[str(i)], fpgaOutput[str(i)],
                       config['batch_sz'], config['outsz'],
                       config['fpgaoutsz'], fcOutput)
        #Compute output softmax
        softmaxOut = xdnn.computeSoftmax(fcOutput)

    #xdnn_io.printClassification(softmaxOut, config['images'], labels);
    end = time.time()
    print("throughput", (num_models * len(batch_array) / (end - start0)),
          "duration", end - start0)
    Inference_result = []
    #Append results
    Inference_Data.append({
        "experiment":
        str(Qmode) + "_bit_mode",
        "duration_overall":
        end - start0,
        "imgsPerSecAll":
        num_models * len(batch_array) / (end - start0),
        "num_models_parallel":
        num_models
    })
    xdnn.closeHandle()

    Inference_Data = pd.DataFrame(Inference_Data)
    #    Inference_Data.to_csv('multinet_results.csv')
    result = pd.read_csv('multinet_results.csv')
    result = result.append(Inference_Data)
    result.to_csv('multinet_results.csv')
Exemplo n.º 9
0
def networkForward(netcfg, layername):

  #args = xdnn_io.processCommandLine()
  parser = xdnn_io.default_parser_args()
  parser.add_argument('--layerindex', type=int, default=0, help='Index value for layer in json', required=True)
  argvt = parser.parse_args()
  args  = xdnn_io.make_dict_args(argvt)
  
  args['netcfg'] = netcfg
  # Hardcode these parameters, so we only have to look at performance of 1 PE
  args["batch_sz"] = 1
  args["PE"] = 0

  #print "{:-^100}".format(' Before: createHandle ')
  ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0")
  #print "{:-^100}".format(' After: createHandle ')
  if ret != 0:
      sys.exit(1)

  fpgaRT = xdnn.XDNNFPGAOp(handles, args)
  #print "{:-^100}".format('1')
  fpgaOutput = fpgaRT.getOutputs()
  #print "{:-^100}".format('2')
  fpgaInput = fpgaRT.getInputs()
  #print "{:-^100}".format('3')

  img_paths = xdnn_io.getFilePaths(args['images'])
  inShape = (args['batch_sz'],) +  tuple ( tuple (fpgaRT.getInputDescriptors().values() )[0][1:] )

  firstInput = list(fpgaInput.values())[0]
  firstOutput = list (fpgaOutput.values())[0] 


  for i in xrange(0, len(img_paths), args['batch_sz']):
    pl = []
    for j, p in enumerate(img_paths[i:i + args['batch_sz']]):
        firstInput[0, ...], _ = xdnn_io.loadImageBlobFromFile(img_paths[0], args['img_raw_scale'], args['img_mean'], args['img_input_scale'], inShape[2], inShape[3])
    pl.append(p)

    with open(args['netcfg']) as fp:
      data = json.load(fp)
      #print json.dumps(data, indent=2)

      # Strip nodes that don't run in hardware
      nodes = data['network']
      nodes = [x for x in nodes if x['xdnn_kv']]

      nLayers = len(nodes)

      # How many iterations to run, and average across
      iterations = 1

      # Initialize empty list to hold accumulated runtime
      t1 = []
      for k in range(iterations):
        t1.append(0.0)

      # Run N iterations of network permutations
      for l in range(iterations):
        fpgaRT.execute(fpgaInput, fpgaOutput)
        t1[l] += (fpgaRT.get_exec_time())

      #for node in nodes:
      #  print node['name']

      # Average it
      avetime = sum(t1)/iterations
      #print "{:<25} = {:<25}".format(layername, avetime)

  return avetime
  xdnn.closeHandle()
  del fpgaRT
  del fpgaInput
  del fpgaOutput
  del ret