def generateLayerwiseJson(layername): #args = xdnn_io.processCommandLine() parser = xdnn_io.default_parser_args() parser.add_argument('--layerindex', type=int, default=0, help='Index value for layer in json', required=True) argvt = parser.parse_args() args = xdnn_io.make_dict_args(argvt) with open (args['netcfg'], 'r') as fp: data = json.load(fp) #print json.dumps(data, indent=2) # Get layers from json nodes = data['network'] #print "Total layers (nodes): ", len(nodes) reachedNode = False for node in nodes: if node['active'] == 0: continue #print "Active: ", node['active'], " ", node['name'] if reachedNode == False and node['name'] == layername: reachedNode = True elif reachedNode and node['name'] != layername: node['active'] = 0 fname = str(layername) + str('.json') fjson = fname.replace('/', '_') with open(fjson, 'w') as wfp: json.dump(data, wfp, indent=2, sort_keys=True) return fjson
def main(): parser = xdnn_io.default_parser_args() parser.add_argument('--numprepproc', type=int, default=1, help='number of parallel processes used to decode and quantize images') parser.add_argument('--numstream', type=int, default=16, help='number of FPGA streams') parser.add_argument('--deviceID', type=int, default=0, help='FPGA no. -> FPGA ID to run in case multiple FPGAs') parser.add_argument('--benchmarkmode', type=int, default=0, help='bypass pre/post processing for benchmarking') args = parser.parse_args() args = xdnn_io.make_dict_args(args) ret = xdnn.createManager() if ret != True: sys.exit(1) sharedInputArrs = [] fpgaOutputs = [] compilerJSONObj = xdnn.CompilerJsonParser( args['netcfg']) qPrep = mp.Queue(maxsize=args['numprepproc']*10) qFpga = mp.Queue(maxsize=100) streamQ = mp.Queue(maxsize=args['numstream']) prepProcQ = mp.Queue(maxsize=100) firstOutputShape = compilerJSONObj.getOutputs().itervalues().next() firstInputShape = compilerJSONObj.getInputs().itervalues().next() for i in range( args['numstream'] ): fpgaOutputs.append(mp.Array(ctypes.c_float, args['batch_sz'] * np.prod( tuple(firstOutputShape[1:]) ) )) streamQ.put ( i ) for i in range(100): bufSize = np.prod(tuple(firstInputShape)) sharedInputArrs.append( mp.Array(ctypes.c_float, bufSize ) ) prepProcQ.put (i) img_paths = xdnn_io.getFilePaths(args['images']) p = mp.Pool( initializer = init_prepImage, initargs = (args, qPrep, img_paths, sharedInputArrs, prepProcQ, compilerJSONObj, ), processes = args['numprepproc']) xdnnProc = mp.Process(target=fpga_process_async, args=(qPrep, qFpga, args, len(img_paths), sharedInputArrs,prepProcQ, streamQ, fpgaOutputs, compilerJSONObj,)) xdnnProc.start() postProc = mp.Process(target=post_process, args=(qFpga, args, img_paths,streamQ, fpgaOutputs,)) postProc.start() if args['perpetual']: while True: res = [p.map_async(run_prepImage, range(len(img_paths)))] for j in res: j.wait() del j else: p.map_async(run_prepImage, range(len(img_paths))) xdnnProc.join() postProc.join() p.close() p.join()
def __init__(self, **kwargs): arglist = [] for k, v in kwargs.items(): arglist.append("--" + str(k)) arglist.append(str(v)) print arglist parser = default_parser() args = parser.parse_args(arglist) self.args = xdnn_io.make_dict_args(args)
def setup(self, bottom, top): self.param_dict = eval(self.param_str) # Get args from prototxt self._args = xdnn_io.make_dict_args(self.param_dict) self._numPE = self._args[ "batch_sz"] # Bryan hack to detremine number of PEs in FPGA # Establish FPGA Communication, Load bitstream ret, handles = xdnn.createHandle(self._args["xclbin"], "kernelSxdnn_0") if ret != 0: raise Exception("Failed to open FPGA handle.") self._args["scaleB"] = 1 self._args["PE"] = -1 # Instantiate runtime interface object self.fpgaRT = xdnn.XDNNFPGAOp(handles, self._args) self._indictnames = self._args["input_names"] self._outdictnames = self._args["output_names"] self._parser = xdnn.CompilerJsonParser(self._args["netcfg"])
def getCurrentLayerByIndex(index = 0): #args = xdnn_io.processCommandLine() parser = xdnn_io.default_parser_args() parser.add_argument('--layerindex', type=int, default=0, help='Index value for layer in json', required=True) argvt = parser.parse_args() args = xdnn_io.make_dict_args(argvt) if 'layerindex' in args: index = args['layerindex'] with open(args['netcfg']) as fp: data = json.load(fp) # Strip nodes that don't run in hardware nodes = data['network'] nodes = [x for x in nodes if x['xdnn_kv'] and x['active'] == 1] # Get layername if index >= len(nodes): return None, None if nodes[index]['xdnn_kv']['slice'] == "0": return nodes[index]['name'], "DBL" return nodes[index]['name'], nodes[index]['xdnn_kv']['XNOp']
default=False, help='batch size') parser.add_argument('--cutAfter', default="", help='Node in graph to start cutting after') return parser if __name__ == "__main__": # Run parser = default_parser() args = parser.parse_args() args = xdnn_io.make_dict_args(args) netpb = caffe_pb2.NetParameter() with open(args["inproto"], "r") as f: pbtf.Parse(f.read(), netpb) srctensor1 = args["cutAfter"] outpb = cut_subgraph(netpb, srctensor1, "subgraph0", args) with open(args["outproto"], "w") as f: f.write(str(outpb)) # if user passes the train_val prototxt, we will steal the data and accuracy layers if (args["trainproto"]): trainpb = caffe_pb2.NetParameter()
def networkForward(netcfg, layername): #args = xdnn_io.processCommandLine() parser = xdnn_io.default_parser_args() parser.add_argument('--layerindex', type=int, default=0, help='Index value for layer in json', required=True) argvt = parser.parse_args() args = xdnn_io.make_dict_args(argvt) args['netcfg'] = netcfg # Hardcode these parameters, so we only have to look at performance of 1 PE args["batch_sz"] = 1 args["PE"] = 0 #print "{:-^100}".format(' Before: createHandle ') ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0") #print "{:-^100}".format(' After: createHandle ') if ret != 0: sys.exit(1) fpgaRT = xdnn.XDNNFPGAOp(handles, args) #print "{:-^100}".format('1') fpgaOutput = fpgaRT.getOutputs() #print "{:-^100}".format('2') fpgaInput = fpgaRT.getInputs() #print "{:-^100}".format('3') img_paths = xdnn_io.getFilePaths(args['images']) inShape = (args['batch_sz'],) + tuple ( tuple (fpgaRT.getInputDescriptors().values() )[0][1:] ) firstInput = list(fpgaInput.values())[0] firstOutput = list (fpgaOutput.values())[0] for i in xrange(0, len(img_paths), args['batch_sz']): pl = [] for j, p in enumerate(img_paths[i:i + args['batch_sz']]): firstInput[0, ...], _ = xdnn_io.loadImageBlobFromFile(img_paths[0], args['img_raw_scale'], args['img_mean'], args['img_input_scale'], inShape[2], inShape[3]) pl.append(p) with open(args['netcfg']) as fp: data = json.load(fp) #print json.dumps(data, indent=2) # Strip nodes that don't run in hardware nodes = data['network'] nodes = [x for x in nodes if x['xdnn_kv']] nLayers = len(nodes) # How many iterations to run, and average across iterations = 1 # Initialize empty list to hold accumulated runtime t1 = [] for k in range(iterations): t1.append(0.0) # Run N iterations of network permutations for l in range(iterations): fpgaRT.execute(fpgaInput, fpgaOutput) t1[l] += (fpgaRT.get_exec_time()) #for node in nodes: # print node['name'] # Average it avetime = sum(t1)/iterations #print "{:<25} = {:<25}".format(layername, avetime) return avetime xdnn.closeHandle() del fpgaRT del fpgaInput del fpgaOutput del ret
def main(): parser = xdnn_io.default_parser_args() parser.add_argument( '--numprepproc', type=int, default=1, help='number of parallel processes used to decode and quantize images') parser.add_argument('--numstream', type=int, default=16, help='number of FPGA streams') parser.add_argument( '--deviceID', type=int, default=0, help='FPGA no. -> FPGA ID to run in case multiple FPGAs') args = parser.parse_args() args = xdnn_io.make_dict_args(args) ret = xdnn.createManager(args['xlnxlib']) if ret != True: sys.exit(1) sharedInputArrs = [] fpgaOutputs = [] qPrep = mp.Queue(maxsize=args['numprepproc'] * 10) qFpga = mp.Queue(maxsize=100) streamQ = mp.Queue(maxsize=args['numstream']) prepProcQ = mp.Queue(maxsize=100) for i in range(args['numstream']): shared_arr = mp.Array(ctypes.c_float, args['batch_sz'] * args['fpgaoutsz']) fpgaOutputs.append(shared_arr) streamQ.put(i) for i in range(100): bufSize = np.prod(args['in_shape']) sharedInputArrs.append(mp.Array(ctypes.c_float, bufSize)) prepProcQ.put(i) img_paths = xdnn_io.getFilePaths(args['images']) p = mp.Pool(initializer=init_prepImage, initargs=( args, qPrep, img_paths, sharedInputArrs, prepProcQ, ), processes=args['numprepproc']) xdnnProc = mp.Process(target=fpga_process_async, args=( qPrep, qFpga, args, len(img_paths), sharedInputArrs, prepProcQ, streamQ, fpgaOutputs, )) xdnnProc.start() postProc = mp.Process(target=post_process, args=( qFpga, args, img_paths, streamQ, fpgaOutputs, )) postProc.start() if args['perpetual']: while True: res = [p.map_async(run_prepImage, range(len(img_paths)))] for j in res: j.wait() del j else: p.map_async(run_prepImage, range(len(img_paths))) xdnnProc.join() postProc.join() p.close() p.join()