예제 #1
0
 def __init__(self, **kwargs):
     arglist = []
     for k, v in kwargs.items():
         arglist.append("--" + str(k))
         arglist.append(str(v))
         print arglist
     parser = default_parser()
     args = parser.parse_args(arglist)
     self.args = xdnn_io.make_dict_args(args)
예제 #2
0
def fpga_init():
    # Parse arguments
    parser = xdnn_io.default_parser_args()
    parser.add_argument('--deviceID', type=int, default=0,
                        help='FPGA no. -> FPGA ID to run in case multiple FPGAs')
    args = parser.parse_args()
    args = xdnn_io.make_dict_args(args)

    # Create manager
    if not xdnn.createManager():
        raise Exception("Failed to create manager")

    compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg'])

    # Get input and output shape
    input_shapes = list(map(lambda x: (x), compilerJSONObj.getInputs().itervalues()))
    output_shapes = list(map(lambda x: (x), compilerJSONObj.getOutputs().itervalues()))

    for in_idx in range(len(input_shapes)):
        input_shapes[in_idx][0] = args['batch_sz']
    for out_idx in range(len(output_shapes)):
        output_shapes[out_idx][0] = args['batch_sz']

    input_node_names = list(map(lambda x: str(x), compilerJSONObj.getInputs().iterkeys()))
    output_node_names = list(map(lambda x: str(x), compilerJSONObj.getOutputs().iterkeys()))

    num_inputs = len(input_shapes)
    num_outputs = len(output_shapes)

    # Create runtime
    ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0", [args["deviceID"]])
    if ret != 0:
        raise Exception("Failed to create handle, return value: {error}".format(error=ret))
    fpgaRT = xdnn.XDNNFPGAOp(handles, args)

    print("Batch size:", args['batch_sz'])
    print("Input shapes:", input_shapes)
    print("Input nodes:", input_node_names)
    print("Ouput shapes:", output_shapes)
    print("Ouput nodes:", output_node_names)

    output_buffers = []
    for _ in range(N_STREAMS):
        buffer = {name: np.empty(shape=shape, dtype=np.float32)
                  for name, shape in zip(output_node_names, output_shapes)}
        output_buffers.append(buffer)

    # fpgaRT.exec_async({input_node_names[0]: np.zeros(input_shapes[0])},
    #                   output_buffers[0], 0)
    # fpgaRT.get_result(0)
    (fcWeight, fcBias) = xdnn_io.loadFCWeightsBias(args)

    return fpgaRT, output_buffers,\
        {name: shape for name, shape in zip(input_node_names, input_shapes)},\
        fcWeight, fcBias
예제 #3
0
    def setup(self, bottom, top):
        self.param_dict = eval(self.param_str)  # Get args from prototxt
        self._args = xdnn_io.make_dict_args(self.param_dict)
        self._numPE = self._args[
            "batch_sz"]  # Bryan hack to detremine number of PEs in FPGA
        # Establish FPGA Communication, Load bitstream
        ret, handles = xdnn.createHandle(self._args["xclbin"], "kernelSxdnn_0")
        if ret != 0:
            raise Exception("Failed to open FPGA handle.")

        self._args["scaleB"] = 1
        self._args["PE"] = -1
        # Instantiate runtime interface object
        self.fpgaRT = xdnn.XDNNFPGAOp(handles, self._args)
        self._indictnames = self._args["input_names"]
        self._outdictnames = self._args["output_names"]
        self._parser = xdnn.CompilerJsonParser(self._args["netcfg"])
예제 #4
0
    def __init__(self, params):
        self._args = xdnn_io.make_dict_args(params)
        self._numPE = self._args[
            "batch_sz"
        ]  # Bryan hack to detremine number of PEs in FPGA

        # Establish FPGA Communication, Load bitstream
        ret, handles = xdnn.createHandle(self._args["xclbin"], "kernelSxdnn_0")
        if ret != 0:
            raise Exception("Failed to open FPGA handle.")

        self._args["scaleB"] = 1
        self._args["PE"] = -1
        self._streamIds = [0, 1, 2, 3, 4, 5, 6, 7]  # Allow 8 streams

        # Instantiate runtime interface object
        self.fpgaRT = xdnn.XDNNFPGAOp(handles, self._args)
        self._indictnames = self._args["input_names"]
        self._outdictnames = self._args["output_names"]
        self._parser = xdnn.CompilerJsonParser(self._args["netcfg"])
예제 #5
0
        type=int,
        default=5,
        help='thresohold on iouthresh across 2 candidate detections')
    parser.add_argument(
        '--detection_labels',
        help="direcotry path detected lable files in darknet style",
        default=None,
        type=str,
        metavar="FILE")
    parser.add_argument('--prob_threshold',
                        type=float,
                        default=0.1,
                        help='threshold for calculation of f1 score')

    args = parser.parse_args()
    args = xdnn_io.make_dict_args(args)
    compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg'])
    firstInputShape = compilerJSONObj.getInputs().itervalues().next()
    firstOutputShape = compilerJSONObj.getOutputs().itervalues().next()
    out_w = firstOutputShape[2]
    out_h = firstOutputShape[3]

    args['net_w'] = int(firstInputShape[2])
    args['net_h'] = int(firstInputShape[3])
    args['out_w'] = int(out_w)
    args['out_h'] = int(out_h)
    args['coords'] = 4
    args['beginoffset'] = (args['coords'] + 1) * int(out_w * out_h)
    args['groups'] = int(out_w * out_h)
    args['batchstride'] = args['groups'] * (args['outsz'] + args['coords'] + 1)
    args['groupstride'] = 1
예제 #6
0
def fpga_init():
    global PORT
    global N_STREAMS
    # Parse arguments
    parser = xdnn_io.default_parser_args()
    parser.add_argument('--device-ids',
                        type=int,
                        default=[0],
                        nargs="+",
                        help='a list of device IDs for FPGA')
    parser.add_argument('--port',
                        type=int,
                        default=5000,
                        help='port to listen on')
    args = parser.parse_args()
    device_ids = args.device_ids
    PORT = args.port
    N_STREAMS *= len(device_ids)
    args = xdnn_io.make_dict_args(args)

    # Create manager
    if not xdnn.createManager():
        raise Exception("Failed to create manager")

    compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg'])

    # Get input and output shape
    input_shapes = list(
        map(lambda x: (x),
            compilerJSONObj.getInputs().itervalues()))
    output_shapes = list(
        map(lambda x: (x),
            compilerJSONObj.getOutputs().itervalues()))

    for in_idx in range(len(input_shapes)):
        input_shapes[in_idx][0] = args['batch_sz']
    for out_idx in range(len(output_shapes)):
        output_shapes[out_idx][0] = args['batch_sz']

    input_node_names = list(
        map(lambda x: str(x),
            compilerJSONObj.getInputs().iterkeys()))
    output_node_names = list(
        map(lambda x: str(x),
            compilerJSONObj.getOutputs().iterkeys()))

    num_inputs = len(input_shapes)
    num_outputs = len(output_shapes)

    # Create runtime
    ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0",
                                     device_ids)
    if ret != 0:
        raise Exception(
            "Failed to create handle, return value: {error}".format(error=ret))
    fpgaRT = xdnn.XDNNFPGAOp(handles, args)

    print("Batch size:", args['batch_sz'])
    print("Input shapes:", input_shapes)
    print("Input nodes:", input_node_names)
    print("Ouput shapes:", output_shapes)
    print("Ouput nodes:", output_node_names)
    print("Using model {path}".format(path=args["netcfg"]))
    print("Using FPGA device:", device_ids)

    output_buffers = []
    for _ in range(N_STREAMS):
        buffer = {
            name: np.empty(shape=shape, dtype=np.float32)
            for name, shape in zip(output_node_names, output_shapes)
        }
        output_buffers.append(buffer)

    # fpgaRT.exec_async({input_node_names[0]: np.zeros(input_shapes[0])},
    #                   output_buffers[0], 0)
    # fpgaRT.get_result(0)
    (fcWeight, fcBias) = xdnn_io.loadFCWeightsBias(args)

    return fpgaRT, output_buffers, output_node_names[0],\
        {name: shape for name, shape in zip(input_node_names, input_shapes)},\
        fcWeight, fcBias, args['batch_sz']
예제 #7
0
def run(args=None):
    if not args:
        parser = xdnn_io.default_parser_args()
        parser.add_argument(
            '--numprepproc',
            type=int,
            default=1,
            help=
            'number of parallel processes used to decode and quantize images')
        parser.add_argument('--numstream',
                            type=int,
                            default=16,
                            help='number of FPGA streams')
        parser.add_argument(
            '--deviceID',
            type=int,
            default=0,
            help='FPGA no. -> FPGA ID to run in case multiple FPGAs')
        parser.add_argument('--benchmarkmode',
                            type=int,
                            default=0,
                            help='bypass pre/post processing for benchmarking')
        args = parser.parse_args()
        args = xdnn_io.make_dict_args(args)

    if not xdnn.createManager():
        sys.exit(1)
    fpgaRT = None
    sharedInputArrs = []
    fpgaOutputs = []

    compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg'])

    input_shapes = map(lambda x: (x), compilerJSONObj.getInputs().itervalues())
    output_shapes = map(lambda x: (x),
                        compilerJSONObj.getOutputs().itervalues())

    #args['batch_sz'] = 1
    for out_idx in range(len(output_shapes)):
        output_shapes[out_idx][0] = args['batch_sz']

    input_sizes = map(lambda x: np.prod(x), input_shapes)
    output_sizes = map(lambda x: np.prod(x), output_shapes)

    num_shared_slots = args['numstream']

    # shared memory from preprocessing to fpga forward
    shared_trans_arrs = SharedMemoryQueue(
        "trans", num_shared_slots * (args['numprepproc'] * args['batch_sz']),
        input_shapes + [(4)])
    # shared memory from fpga forward to postprocessing
    shared_output_arrs = SharedMemoryQueue(
        "output", num_shared_slots, output_shapes + [(args['batch_sz'], 4)])

    img_paths = xdnn_io.getFilePaths(args['images'])

    p = mp.Pool(initializer=init_pre_process,
                initargs=(
                    args,
                    img_paths,
                    input_shapes,
                    shared_trans_arrs,
                ),
                processes=args['numprepproc'])

    xdnnProc = mp.Process(target=fpga_process,
                          args=(
                              fpgaRT,
                              args,
                              len(img_paths),
                              compilerJSONObj,
                              shared_trans_arrs,
                              shared_output_arrs,
                          ))

    postProc = mp.Process(target=post_process,
                          args=(
                              args,
                              img_paths,
                              fpgaOutputs,
                              output_shapes,
                              shared_output_arrs,
                          ))
    xdnnProc.start()
    postProc.start()

    if args['perpetual']:
        while True:
            res = [p.map_async(run_pre_process, range(len(img_paths)))]
            for j in res:
                j.wait()
                del j
    else:
        p.map_async(run_pre_process, range(len(img_paths)))

    xdnnProc.join()
    postProc.join()

    p.close()
    p.join()