def main(args=None): os.environ['LS_BIND_NOW'] = "1" args = xdnn_io.processCommandLine() images = xdnn_io.getFilePaths(args['images']) # spawn dispatcher dispatcher = Dispatcher(args['vitis_rundir'], g_nFPGA, g_nDispatchers, args['batch_sz']) inshape = dispatcher.inshape # send work to system work = [] for qIdx in range(g_nQueries): idx = qIdx * inshape[0] workBatch = [ images[(idx + i) % len(images)] for i in range(inshape[0]) ] work.append((qIdx, workBatch, (args['img_raw_scale'], args['img_mean'], args['img_input_scale']))) startTime = timeit.default_timer() dispatcher.run(work) del dispatcher t = timeit.default_timer() - startTime print("Queries: %d, Elapsed: %.2fs, QPS: %.2f, FPS: %.2f" \ % (g_nQueries, t, g_nQueries / t, g_nQueries * inshape[0] / t)) sys.stdout.flush()
def main(): args = xdnn_io.processCommandLine() runner = Runner(args['vitis_rundir']) inTensors = runner.get_input_tensors() outTensors = runner.get_output_tensors() batch_sz = args['batch_sz'] if batch_sz == -1: # use Runner's suggested batch size batch_sz = inTensors[0].dims[0] if args['golden']: goldenMap = xdnn_io.getGoldenMap(args['golden']) top5Count = 0 top1Count = 0 fpgaBlobs = [] for io in [inTensors, outTensors]: blobs = [] for t in io: shape = (batch_sz,) + tuple([t.dims[i] for i in range(t.ndims)][1:]) blobs.append(np.empty((shape), dtype=np.float32, order='C')) fpgaBlobs.append(blobs) img_paths = xdnn_io.getFilePaths(args['images']) labels = xdnn_io.get_labels(args['labels']) xdnnCPUOp = xdnn.XDNNCPUOp("%s/weights.h5" % args['vitis_rundir']) fcOutput = np.empty((batch_sz, args['outsz'],), dtype=np.float32, order='C') fpgaInput = fpgaBlobs[0][0] for i in range(0, len(img_paths), batch_sz): pl = [] # fill tensor input data from image file for j, p in enumerate(img_paths[i:i + batch_sz]): img, _ = xdnn_io.loadImageBlobFromFile(p, args['img_raw_scale'], args['img_mean'], args['img_input_scale'], fpgaInput.shape[2], fpgaInput.shape[3]) pl.append(p) np.copyto(fpgaInput[j], img) jid = runner.execute_async(fpgaBlobs[0], fpgaBlobs[1]) runner.wait(jid) xdnnCPUOp.computeFC(fpgaBlobs[1][0], fcOutput) softmaxOut = xdnnCPUOp.computeSoftmax(fcOutput) if args['golden']: for j,p in enumerate(img_paths[i:i + batch_sz]): top1Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p, labels, 1) top5Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p, labels, 5) else: xdnn_io.printClassification(softmaxOut, pl, labels) if args['golden']: print ( ("\nAverage accuracy (n=%d) Top-1: %.1f%%, Top-5: %.1f%%\n") % (len(img_paths), float(top1Count)/float(len(img_paths))*100., float(top5Count)/float(len(img_paths))*100.) )
def __init__(self): global pool global pFpgaRT self.args = xdnn_io.processCommandLine() # Get command line args pool = Pool( self.args['numproc'] ) # Depends on new switch, new switch is not added to xdnn_io.py as of yet # Split Images into batches - list of lists self.batches = [] self.all_image_paths = xdnn_io.getFilePaths( self.args['images']) #[0:10000] for i in range(0, len(self.all_image_paths), self.args['batch_sz']): self.batches.append(self.all_image_paths[i:i + self.args['batch_sz']]) pFpgaRT = xdnn.XDNNFPGAOp(self.args) # Parent process gets handle self.args['inShape'] = (self.args['batch_sz'], ) + tuple( tuple(pFpgaRT.getInputDescriptors().values())[0] [1:]) # Save input shape for children self.mpid = pFpgaRT.getMPID() # Save handle to use in child processes
def main(): args = xdnn_io.processCommandLine() images = xdnn_io.getFilePaths(args['images']) # start comms xserver = xstream.Server() # acquire resources fmaster = FpgaMaster(args['vitis_rundir']) inshape = list(fmaster.inshape) if args['batch_sz'] != -1: inshape[0] = args['batch_sz'] # update batch size # spawn dispatchers dispatcher = Dispatcher(g_nDispatchers, g_nWorkers, inshape) # spawn workers workers = WorkerPool(args['vitis_rundir'] + "_worker", g_nWorkers, args) # send work to system work = [] for qIdx in range(g_nQueries): idx = qIdx * inshape[0] workBatch = [ images[(idx + i) % len(images)] for i in range(inshape[0]) ] work.append((qIdx, workBatch, (args['img_raw_scale'], args['img_mean'], args['img_input_scale']))) startTime = timeit.default_timer() dispatcher.run(work) del dispatcher t = timeit.default_timer() - startTime print("Queries: %d, Elapsed: %.2fs, QPS: %.2f, FPS: %.2f" \ % (g_nQueries, t, g_nQueries / t, g_nQueries * inshape[0] / t)) # cleanup del workers del fmaster del xserver
top1Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p, labels, 1) top5Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p, labels, 5) else: xdnn_io.printClassification(softmaxOut, pl, labels) if args['golden']: print(("\nAverage accuracy (n=%d) Top-1: %.1f%%, Top-5: %.1f%%\n") % (len(img_paths), float(top1Count) / float(len(img_paths)) * 100., float(top5Count) / float(len(img_paths)) * 100.)) if __name__ == '__main__': print("\n\n\n\n\n\n\n\n" + '\33[32m' + "Running Inference with HW Pre-processing with JPEG decoder" + '\33[0m') args = xdnn_io.processCommandLine() #Create a queue for passing the pre-processed data q = mp.Queue() #Creating a process to run HW pre-processing kernel p_preprocess = mp.Process(target=pre_process, args=(q, args)) #Process to run XDNN p_xdnn = mp.Process(target=process_xdnn, args=(q, args)) p_preprocess.start() p_xdnn.start() p_preprocess.join() p_xdnn.join()