def __init__(self, **kwargs): arglist = [] for k, v in kwargs.items(): arglist.append("--" + str(k)) arglist.append(str(v)) print arglist parser = default_parser() args = parser.parse_args(arglist) self.args = xdnn_io.make_dict_args(args)
def fpga_init(): # Parse arguments parser = xdnn_io.default_parser_args() parser.add_argument('--deviceID', type=int, default=0, help='FPGA no. -> FPGA ID to run in case multiple FPGAs') args = parser.parse_args() args = xdnn_io.make_dict_args(args) # Create manager if not xdnn.createManager(): raise Exception("Failed to create manager") compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg']) # Get input and output shape input_shapes = list(map(lambda x: (x), compilerJSONObj.getInputs().itervalues())) output_shapes = list(map(lambda x: (x), compilerJSONObj.getOutputs().itervalues())) for in_idx in range(len(input_shapes)): input_shapes[in_idx][0] = args['batch_sz'] for out_idx in range(len(output_shapes)): output_shapes[out_idx][0] = args['batch_sz'] input_node_names = list(map(lambda x: str(x), compilerJSONObj.getInputs().iterkeys())) output_node_names = list(map(lambda x: str(x), compilerJSONObj.getOutputs().iterkeys())) num_inputs = len(input_shapes) num_outputs = len(output_shapes) # Create runtime ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0", [args["deviceID"]]) if ret != 0: raise Exception("Failed to create handle, return value: {error}".format(error=ret)) fpgaRT = xdnn.XDNNFPGAOp(handles, args) print("Batch size:", args['batch_sz']) print("Input shapes:", input_shapes) print("Input nodes:", input_node_names) print("Ouput shapes:", output_shapes) print("Ouput nodes:", output_node_names) output_buffers = [] for _ in range(N_STREAMS): buffer = {name: np.empty(shape=shape, dtype=np.float32) for name, shape in zip(output_node_names, output_shapes)} output_buffers.append(buffer) # fpgaRT.exec_async({input_node_names[0]: np.zeros(input_shapes[0])}, # output_buffers[0], 0) # fpgaRT.get_result(0) (fcWeight, fcBias) = xdnn_io.loadFCWeightsBias(args) return fpgaRT, output_buffers,\ {name: shape for name, shape in zip(input_node_names, input_shapes)},\ fcWeight, fcBias
def setup(self, bottom, top): self.param_dict = eval(self.param_str) # Get args from prototxt self._args = xdnn_io.make_dict_args(self.param_dict) self._numPE = self._args[ "batch_sz"] # Bryan hack to detremine number of PEs in FPGA # Establish FPGA Communication, Load bitstream ret, handles = xdnn.createHandle(self._args["xclbin"], "kernelSxdnn_0") if ret != 0: raise Exception("Failed to open FPGA handle.") self._args["scaleB"] = 1 self._args["PE"] = -1 # Instantiate runtime interface object self.fpgaRT = xdnn.XDNNFPGAOp(handles, self._args) self._indictnames = self._args["input_names"] self._outdictnames = self._args["output_names"] self._parser = xdnn.CompilerJsonParser(self._args["netcfg"])
def __init__(self, params): self._args = xdnn_io.make_dict_args(params) self._numPE = self._args[ "batch_sz" ] # Bryan hack to detremine number of PEs in FPGA # Establish FPGA Communication, Load bitstream ret, handles = xdnn.createHandle(self._args["xclbin"], "kernelSxdnn_0") if ret != 0: raise Exception("Failed to open FPGA handle.") self._args["scaleB"] = 1 self._args["PE"] = -1 self._streamIds = [0, 1, 2, 3, 4, 5, 6, 7] # Allow 8 streams # Instantiate runtime interface object self.fpgaRT = xdnn.XDNNFPGAOp(handles, self._args) self._indictnames = self._args["input_names"] self._outdictnames = self._args["output_names"] self._parser = xdnn.CompilerJsonParser(self._args["netcfg"])
type=int, default=5, help='thresohold on iouthresh across 2 candidate detections') parser.add_argument( '--detection_labels', help="direcotry path detected lable files in darknet style", default=None, type=str, metavar="FILE") parser.add_argument('--prob_threshold', type=float, default=0.1, help='threshold for calculation of f1 score') args = parser.parse_args() args = xdnn_io.make_dict_args(args) compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg']) firstInputShape = compilerJSONObj.getInputs().itervalues().next() firstOutputShape = compilerJSONObj.getOutputs().itervalues().next() out_w = firstOutputShape[2] out_h = firstOutputShape[3] args['net_w'] = int(firstInputShape[2]) args['net_h'] = int(firstInputShape[3]) args['out_w'] = int(out_w) args['out_h'] = int(out_h) args['coords'] = 4 args['beginoffset'] = (args['coords'] + 1) * int(out_w * out_h) args['groups'] = int(out_w * out_h) args['batchstride'] = args['groups'] * (args['outsz'] + args['coords'] + 1) args['groupstride'] = 1
def fpga_init(): global PORT global N_STREAMS # Parse arguments parser = xdnn_io.default_parser_args() parser.add_argument('--device-ids', type=int, default=[0], nargs="+", help='a list of device IDs for FPGA') parser.add_argument('--port', type=int, default=5000, help='port to listen on') args = parser.parse_args() device_ids = args.device_ids PORT = args.port N_STREAMS *= len(device_ids) args = xdnn_io.make_dict_args(args) # Create manager if not xdnn.createManager(): raise Exception("Failed to create manager") compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg']) # Get input and output shape input_shapes = list( map(lambda x: (x), compilerJSONObj.getInputs().itervalues())) output_shapes = list( map(lambda x: (x), compilerJSONObj.getOutputs().itervalues())) for in_idx in range(len(input_shapes)): input_shapes[in_idx][0] = args['batch_sz'] for out_idx in range(len(output_shapes)): output_shapes[out_idx][0] = args['batch_sz'] input_node_names = list( map(lambda x: str(x), compilerJSONObj.getInputs().iterkeys())) output_node_names = list( map(lambda x: str(x), compilerJSONObj.getOutputs().iterkeys())) num_inputs = len(input_shapes) num_outputs = len(output_shapes) # Create runtime ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0", device_ids) if ret != 0: raise Exception( "Failed to create handle, return value: {error}".format(error=ret)) fpgaRT = xdnn.XDNNFPGAOp(handles, args) print("Batch size:", args['batch_sz']) print("Input shapes:", input_shapes) print("Input nodes:", input_node_names) print("Ouput shapes:", output_shapes) print("Ouput nodes:", output_node_names) print("Using model {path}".format(path=args["netcfg"])) print("Using FPGA device:", device_ids) output_buffers = [] for _ in range(N_STREAMS): buffer = { name: np.empty(shape=shape, dtype=np.float32) for name, shape in zip(output_node_names, output_shapes) } output_buffers.append(buffer) # fpgaRT.exec_async({input_node_names[0]: np.zeros(input_shapes[0])}, # output_buffers[0], 0) # fpgaRT.get_result(0) (fcWeight, fcBias) = xdnn_io.loadFCWeightsBias(args) return fpgaRT, output_buffers, output_node_names[0],\ {name: shape for name, shape in zip(input_node_names, input_shapes)},\ fcWeight, fcBias, args['batch_sz']
def run(args=None): if not args: parser = xdnn_io.default_parser_args() parser.add_argument( '--numprepproc', type=int, default=1, help= 'number of parallel processes used to decode and quantize images') parser.add_argument('--numstream', type=int, default=16, help='number of FPGA streams') parser.add_argument( '--deviceID', type=int, default=0, help='FPGA no. -> FPGA ID to run in case multiple FPGAs') parser.add_argument('--benchmarkmode', type=int, default=0, help='bypass pre/post processing for benchmarking') args = parser.parse_args() args = xdnn_io.make_dict_args(args) if not xdnn.createManager(): sys.exit(1) fpgaRT = None sharedInputArrs = [] fpgaOutputs = [] compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg']) input_shapes = map(lambda x: (x), compilerJSONObj.getInputs().itervalues()) output_shapes = map(lambda x: (x), compilerJSONObj.getOutputs().itervalues()) #args['batch_sz'] = 1 for out_idx in range(len(output_shapes)): output_shapes[out_idx][0] = args['batch_sz'] input_sizes = map(lambda x: np.prod(x), input_shapes) output_sizes = map(lambda x: np.prod(x), output_shapes) num_shared_slots = args['numstream'] # shared memory from preprocessing to fpga forward shared_trans_arrs = SharedMemoryQueue( "trans", num_shared_slots * (args['numprepproc'] * args['batch_sz']), input_shapes + [(4)]) # shared memory from fpga forward to postprocessing shared_output_arrs = SharedMemoryQueue( "output", num_shared_slots, output_shapes + [(args['batch_sz'], 4)]) img_paths = xdnn_io.getFilePaths(args['images']) p = mp.Pool(initializer=init_pre_process, initargs=( args, img_paths, input_shapes, shared_trans_arrs, ), processes=args['numprepproc']) xdnnProc = mp.Process(target=fpga_process, args=( fpgaRT, args, len(img_paths), compilerJSONObj, shared_trans_arrs, shared_output_arrs, )) postProc = mp.Process(target=post_process, args=( args, img_paths, fpgaOutputs, output_shapes, shared_output_arrs, )) xdnnProc.start() postProc.start() if args['perpetual']: while True: res = [p.map_async(run_pre_process, range(len(img_paths)))] for j in res: j.wait() del j else: p.map_async(run_pre_process, range(len(img_paths))) xdnnProc.join() postProc.join() p.close() p.join()