示例#1
0
  def __init__(self, param_str):
    self.input_mean_value_=128.0
    self.input_scale_=1.0

    param_dict = eval(param_str) # Get args from prototxt

    self._args = xdnn_io.make_dict_args(param_dict)
    self._numPE = self._args["batch_sz"] # Bryan hack to determine number of PEs in FPGA

    # Establish FPGA Communication, Load bitstream
    ret, handles = xdnn.createHandle(self._args["xclbin"], "kernelSxdnn_0")
    if ret != 0:
      raise Exception("Failed to open FPGA handle.")

    self._args["scaleB"] = 1
    self._args["PE"] = -1

    # Instantiate runtime interface object
    self._fpgaRT = xdnn.XDNNFPGAOp(handles, self._args)

    self._parser = xdnn.CompilerJsonParser(self._args["netcfg"])

    self._indictnames = self._parser.getInputs()
    self._outdictnames =  self._parser.getOutputs()

    input_shapes = map(lambda x: tuple(x), self._parser.getInputs().itervalues())
    output_shapes = map(lambda x: tuple(x), self._parser.getOutputs().itervalues())

    self._indict = {}
    for i,name in enumerate(self._indictnames):
        self._indict[name] = np.empty(input_shapes[i],dtype=np.float32)

    self._outdict = {}
    for i,name in enumerate(self._outdictnames):
        self._outdict[name] = np.empty(output_shapes[i],dtype=np.float32)
示例#2
0
def main():
    parser = xdnn_io.default_parser_args()
    parser = yolo_parser_args(parser)
    args = parser.parse_args()
    args = xdnn_io.make_dict_args(args)

    g_nDispatchers = args['numprepproc']
    g_nWorkers = args['numworkers']

    # Setup the environment
    images = xdnn_io.getFilePaths(args['images'])
    if (args['golden'] or args['visualize']):
        assert args['labels'], "Provide --labels to compute mAP."
        assert args[
            'results_dir'], "For accuracy measurements, provide --results_dir to save the detections."

    # start comms
    xserver = xstream.Server()

    # acquire resources
    fmaster = FpgaMaster(args['vitis_rundir'])

    # update batch size
    inshape = list(fmaster.inshape)
    if args['batch_sz'] != -1:
        inshape[0] = args['batch_sz']

    args['net_h'] = inshape[2]
    args['net_w'] = inshape[3]

    # spawn dispatchers
    dispatcher = yoloDispatcher(g_nDispatchers, g_nWorkers, inshape)

    # spawn workers
    workers = yoloWorkerPool(args['vitis_rundir'] + "_worker", g_nWorkers,
                             args)

    # send work to system
    g_nQueries = int(np.ceil(len(images) / inshape[0]))
    work = []
    for qIdx in range(g_nQueries):
        idx = qIdx * inshape[0]
        workBatch = [
            images[(idx + i) % len(images)] for i in range(inshape[0])
        ]
        work.append((qIdx, workBatch, (args['img_raw_scale'], args['img_mean'],
                                       args['img_input_scale'])))

    startTime = timeit.default_timer()
    dispatcher.run(work)
    del dispatcher
    t = timeit.default_timer() - startTime

    print("Queries: %d, Elapsed: %.2fs, QPS: %.2f, FPS: %.2f" \
      % (g_nQueries, t, g_nQueries / t, g_nQueries * inshape[0] / t))
    sys.stdout.flush()

    # cleanup
    del workers
    del fmaster
    del xserver

    # mAP calculation
    if (args['golden']):
        print()
        print("Computing mAP score  : ")
        labels = xdnn_io.get_labels(args['labels'])
        print("Class names are  : {} ".format(labels))
        mAP = calc_detector_mAP(args['results_dir'], args['golden'],
                                len(labels), labels, args['prob_threshold'],
                                args['mapiouthresh'], args['points'])
        sys.stdout.flush()
示例#3
0
def run(args=None):
  if not args:
    parser = xdnn_io.default_parser_args()
    parser.add_argument('--numprepproc', type=int, default=1,
                        help='number of parallel processes used to decode and quantize images')
    parser.add_argument('--numstream', type=int, default=16,
                        help='number of FPGA streams')
    parser.add_argument('--deviceID', type=int, default=0,
                        help='FPGA no. -> FPGA ID to run in case multiple FPGAs')
    parser.add_argument('--benchmarkmode', type=int, default=0,
                        help='bypass pre/post processing for benchmarking')
    parser.add_argument('--profile', action='store_true',
                        help='Print average latencies for preproc/exec/postproc')

    args = parser.parse_args()
    args = xdnn_io.make_dict_args(args)

  sharedInputArrs = []
  fpgaOutputs = []

  compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg'])

  input_shapes = [v for k,v in compilerJSONObj.getInputs().items()]
  output_shapes = [v for k,v in compilerJSONObj.getOutputs().items()]

  for in_idx in range(len(input_shapes)):
      input_shapes[in_idx][0] = args['batch_sz']
  for out_idx in range(len(output_shapes)):
      output_shapes[out_idx][0] = args['batch_sz']

  input_sizes = map(lambda x: np.prod(x), input_shapes)
  output_sizes = map(lambda x: np.prod(x), output_shapes)

  num_shared_slots = args['numstream']

  # shared memory from preprocessing to fpga forward
  shared_trans_arrs = SharedMemoryQueue("trans",num_shared_slots*(args['numprepproc']*args['batch_sz']),
          input_shapes +[(args['batch_sz'], 4)])

  # shared memory from fpga forward to postprocessing
  shared_output_arrs = SharedMemoryQueue("output",num_shared_slots, output_shapes + [(args['batch_sz'], 4)])

  # Form list of images to chunks of batch_sz
  img_paths = xdnn_io.getFilePaths(args['images'])
  imgids = list(range(len(img_paths)))
  imgid_chunks = [ imgids[i:i+args['batch_sz']] for i in range(0, len(img_paths), args['batch_sz']) ]

  # Start all processes
  p = mp.Pool(initializer = init_pre_process,
    initargs = (args,  img_paths, input_shapes, shared_trans_arrs, ), processes = args['numprepproc'])

  xdnnProc = mp.Process(target=fpga_process, args=(args, len(imgid_chunks), compilerJSONObj,shared_trans_arrs,shared_output_arrs,))

  postProc = mp.Process(target=post_process, args=(args, img_paths, fpgaOutputs,output_shapes,shared_output_arrs,))
  xdnnProc.start()
  postProc.start()

  t1 = timeit.default_timer()
  if args['perpetual']:
    while True:
      res = [p.map_async(run_pre_process, imgid_chunks)]
      for j in res:
        j.wait()
        del j
  else:
    p.map_async(run_pre_process, imgid_chunks)

  xdnnProc.join()
  postProc.join()

  p.close()
  p.join()
  t2 = timeit.default_timer()
  total_t = t2 - t1
  if(args['profile']):
    print("Total time taken: {} s\n Total images: {}\nAverage FPS: {}".format(total_t, \
            len(img_paths), len(img_paths)/total_t))
示例#4
0
def run(args=None):
    if not args:
        parser = xdnn_io.default_parser_args()
        parser = yolo_parser_args(parser)
        parser.add_argument('--startxstream',
                            default=True,
                            action='store_true',
                            help='automatically start obj store server')
        parser.add_argument('--servermode',
                            default=False,
                            action='store_true',
                            help='accept images from another process')
        parser.add_argument("--deploymodel",
                            type=str,
                            default='',
                            help='Original prototxt')
        parser.add_argument("--caffemodel",
                            type=str,
                            default='',
                            help='Original caffemodel')

        args = parser.parse_args()
        args = xdnn_io.make_dict_args(args)
        args['preprocseq'] = [('resize', (224, 224)),
                              ('meansub', [104.007, 116.669, 122.679]),
                              ('chtranspose', (2, 0, 1))]

    if (args['golden'] or args['visualize']):
        assert args['labels'], "Provide --labels to compute mAP."
        assert args[
            'results_dir'], "For accuracy measurements, provide --results_dir to save the detections."
        labels = xdnn_io.get_labels(args['labels'])
        colors = generate_colors(len(labels))

    args['startxstream'] = True
    args['servermode'] = False

    timerQ = Queue()
    args['timerQ'] = timerQ

    compJson = xdnn.CompilerJsonParser(args['netcfg'])
    firstInputShape = next(itervalues(compJson.getInputs()))
    args['net_h'] = firstInputShape[2]
    args['net_w'] = firstInputShape[3]

    # start object store
    # (make sure to 'pip install pyarrow')
    xserver = None
    if args['startxstream']:
        xserver = xstream.Server()

    graph = grapher.Graph("yolo_v2")
    graph.node("prep", yolov2_pre.Node, args)
    graph.node("fpga", yolov2_fpga.Node, args)
    graph.node("post", yolov2_post.Node, args)

    graph.edge("START", None, "prep")
    graph.edge("prep", "prep", "fpga")
    graph.edge("fpga", "fpga", "post")
    graph.edge("DONE", "post", "fpga")
    graph.edge("DONE", "post", None)

    if not args['servermode']:
        graph.serve(background=True)
        img_paths = xdnn_io.getFilePaths(args['images'])

        reqProc = mp.Process(target=request_process,
                             args=(
                                 args,
                                 img_paths,
                                 graph._in[0],
                                 graph._out[0],
                             ))

        t = timeit.default_timer()
        reqProc.start()
        reqProc.join()
        graph.stop(kill=False)
        t2 = args['timerQ'].get()
        full_time = t2 - t

        args['timerQ'].close()

        print("Total time : {}s for {} images".format(full_time,
                                                      len(img_paths)))
        print("Average FPS : {} imgs/sec".format(len(img_paths) / full_time))
    else:
        print("Serving %s -> %s" % (graph._in[0], graph._out[0]))
        graph.serve()

    # mAP calculation
    if (args['golden']):
        print(flush=True)
        print("Computing mAP score  : ", flush=True)
        print("Class names are  : {} ".format(labels), flush=True)
        mAP = calc_detector_mAP(args['results_dir'], args['golden'], len(labels), labels,\
                args['prob_threshold'], args['mapiouthresh'], args['points'])
        sys.stdout.flush()
示例#5
0
def main():
    parser = xdnn_io.default_parser_args()
    parser = yolo_parser_args(parser)
    args = parser.parse_args()
    args = xdnn_io.make_dict_args(args)

    # Setup the environment
    img_paths = xdnn_io.getFilePaths(args['images'])
    if (args['golden'] or args['visualize']):
        assert args['labels'], "Provide --labels to compute mAP."
        assert args[
            'results_dir'], "For accuracy measurements, provide --results_dir to save the detections."
        labels = xdnn_io.get_labels(args['labels'])
        colors = generate_colors(len(labels))

    if args['yolo_version'] == 'v2': yolo_postproc = yolo.yolov2_postproc
    elif args['yolo_version'] == 'v3': yolo_postproc = yolo.yolov3_postproc

    runner = Runner(args['vitis_rundir'])

    # Setup the blobs
    inTensors = runner.get_input_tensors()
    outTensors = runner.get_output_tensors()
    batch_sz = args['batch_sz']
    if batch_sz == -1:
        batch_sz = inTensors[0].dims[0]

    fpgaBlobs = []
    for io in [inTensors, outTensors]:
        blobs = []
        for t in io:
            shape = (batch_sz, ) + tuple([t.dims[i]
                                          for i in range(t.ndims)][1:])
            blobs.append(np.empty((shape), dtype=np.float32, order='C'))
        fpgaBlobs.append(blobs)
    fpgaInput = fpgaBlobs[0][0]

    # Setup the YOLO config
    net_h, net_w = fpgaInput.shape[-2:]
    args['net_h'] = net_h
    args['net_w'] = net_w
    biases = bias_selector(args)

    # Setup profiling env
    prep_time = 0
    exec_time = 0
    post_time = 0

    # Start the execution
    for i in range(0, len(img_paths), batch_sz):
        pl = []
        img_shapes = []

        # Prep images
        t1 = timeit.default_timer()
        for j, p in enumerate(img_paths[i:i + batch_sz]):
            fpgaInput[j, ...], img_shape = xdnn_io.loadYoloImageBlobFromFile(
                p, net_h, net_w)
            pl.append(p)
            img_shapes.append(img_shape)
        t2 = timeit.default_timer()

        # Execute
        jid = runner.execute_async(fpgaBlobs[0], fpgaBlobs[1])
        runner.wait(jid)

        # Post Proc
        t3 = timeit.default_timer()
        boxes = yolo_postproc(fpgaBlobs[1], args, img_shapes, biases=biases)
        t4 = timeit.default_timer()

        prep_time += (t2 - t1)
        exec_time += (t3 - t2)
        post_time += (t4 - t3)

        for i in range(min(batch_sz, len(img_shapes))):
            print("Detected {} boxes in {}".format(len(boxes[i]), pl[i]))

        # Save the result
        if (args['results_dir']):
            for i in range(min(batch_sz, len(img_shapes))):
                filename = os.path.splitext(os.path.basename(pl[i]))[0]
                out_file_txt = os.path.join(args['results_dir'],
                                            filename + '.txt')
                print("Saving {} boxes to {}".format(len(boxes[i]),
                                                     out_file_txt))
                sys.stdout.flush()
                saveDetectionDarknetStyle(out_file_txt, boxes[i],
                                          img_shapes[i])
                if (args['visualize']):
                    out_file_png = os.path.join(args['results_dir'],
                                                filename + '.png')
                    print("Saving result to {}".format(out_file_png))
                    sys.stdout.flush()
                    draw_boxes(pl[i], boxes[i], labels, colors, out_file_png)

    # Profiling results
    if (args['profile']):
        print("\nAverage Latency in ms:")
        print("  Image Prep: {0:3f}".format(prep_time * 1000.0 /
                                            len(img_paths)))
        print("  Exec: {0:3f}".format(exec_time * 1000.0 / len(img_paths)))
        print("  Post Proc: {0:3f}".format(post_time * 1000.0 /
                                           len(img_paths)))
        sys.stdout.flush()

    # mAP calculation
    if (args['golden']):
        print()
        print("Computing mAP score  : ")
        print("Class names are  : {} ".format(labels))
        mAP = calc_detector_mAP(args['results_dir'], args['golden'],
                                len(labels), labels, args['prob_threshold'],
                                args['mapiouthresh'], args['points'])
        sys.stdout.flush()
示例#6
0
def main():
    parser = argparse.ArgumentParser()
    parser = yolo_parser_args(parser)
    parser.add_argument(
        '--deploymodel',
        help="network definition prototxt file in case of caffe",
        required=True,
        type=extant_file,
        metavar="FILE")
    parser.add_argument(
        '--caffemodel',
        help="network weights caffe model file in case of caffe",
        required=True,
        type=extant_file,
        metavar="FILE")
    parser.add_argument('--images',
                        nargs='*',
                        help='directory or raw image files to use as input',
                        required=True,
                        type=extant_file,
                        metavar="FILE")
    parser.add_argument('--labels',
                        help='label ID',
                        type=extant_file,
                        metavar="FILE")
    parser.add_argument('--golden',
                        help='Ground truth directory',
                        type=extant_file,
                        metavar="FILE")
    parser.add_argument(
        '--mean_value',
        type=int,
        nargs=3,
        default=[0, 0, 0],  # BGR for Caffe
        help='image mean values ')
    parser.add_argument('--pxscale',
                        type=float,
                        default=(1.0 / 255.0),
                        help='pix cale value')
    parser.add_argument(
        '--transpose',
        type=int,
        default=[2, 0, 1],
        nargs=3,
        help=
        "Passed to caffe.io.Transformer function set_transpose, default 2,0,1")
    parser.add_argument(
        '--channel_swap',
        type=int,
        default=[2, 1, 0],
        nargs=3,
        help=
        "Passed to caffe.io.Transformer function set_channel_swap, default 2,1,0"
    )
    parser.add_argument('--caffe_backend_path', help='caffe backend')
    parser.add_argument('--gpu',
                        type=int,
                        default=None,
                        help='GPU-ID to run Caffe inference on GPU')
    args = parser.parse_args()
    args = xdnn_io.make_dict_args(args)

    num_images_processed = yolo_gpu_inference(
        args['caffe_backend_path'], args['images'], args['deploymodel'],
        args['caffemodel'], args['results_dir'], args['iouthresh'],
        args['scorethresh'], args['mean_value'], args['pxscale'],
        args['transpose'], args['channel_swap'], args['yolo_model'],
        args['classes'], args)

    print('num images processed : ', num_images_processed)

    # mAP calculation
    if (args['golden']):
        labels = xdnn_io.get_labels(args['labels'])
        print()
        print("Computing mAP score  : ")
        print("Class names are  : {} ".format(labels))
        mAP = calc_detector_mAP(args['results_dir'], args['golden'],
                                len(labels), labels, args['prob_threshold'],
                                args['iouthresh'])
        sys.stdout.flush()
示例#7
0
    print("  Post Proc: {0:3f}".format(post_time * 1000.0 / len(img_paths)))
    sys.stdout.flush()

  # mAP calculation
  if(args['golden']):
    print()
    print("Computing mAP score  : ")
    print("Class names are  : {} ".format(labels))
    mAP = calc_detector_mAP(args['results_dir'], args['golden'], len(labels), labels, args['prob_threshold'], args['iouthresh'])
    sys.stdout.flush()

if __name__ == '__main__':
  #main()
  parser = xdnn_io.default_parser_args()
  
  parser = yolo_parser_args(parser)
  args = parser.parse_args()
  args = xdnn_io.make_dict_args(args)
  q_img = mp.Queue()
  q_shape = mp.Queue()
		#Creating a process to run HW pre-processing kernel
  p_preprocess = mp.Process(target=pre_process,args=(q_img, q_shape, args))
		#Process to run XDNN
  p_xdnn = mp.Process(target=process_xdnn,args=(q_img, q_shape, args))

  p_preprocess.start()
  p_xdnn.start()
  p_preprocess.join()
  p_xdnn.join()

示例#8
0
def run(args=None):
    if not args:
        parser = xdnn_io.default_parser_args()
        parser.add_argument('--numprepproc',
                            type=int,
                            default=1,
                            help='# parallel procs to decode/quantize images')
        parser.add_argument('--numstream',
                            type=int,
                            default=6,
                            help='number of FPGA streams')
        parser.add_argument('--deviceID',
                            type=int,
                            default=0,
                            help='FPGA no. -> FPGA ID to use multiple FPGAs')
        parser.add_argument('--benchmarkmode',
                            type=int,
                            default=0,
                            help='bypass pre/post processing for benchmarking')
        parser.add_argument('--startxstream',
                            default=False,
                            action='store_true',
                            help='automatically start obj store server')
        parser.add_argument('--servermode',
                            default=False,
                            action='store_true',
                            help='accept images from another process')
        args = parser.parse_args()
        args = xdnn_io.make_dict_args(args)
        args['preprocseq'] = [('resize', (224, 224)),
                              ('meansub', [104.007, 116.669, 122.679]),
                              ('chtranspose', (2, 0, 1))]

    # start object store
    # (make sure to 'pip install pyarrow')
    xserver = None
    if args['startxstream']:
        xserver = xstream.Server()

    graph = grapher.Graph("imagenet")
    graph.node("prep", pre.Node, args)
    graph.node("fpga", fpga.Node, args)
    graph.node("post", post.Node, args)

    graph.edge("START", None, "prep")
    graph.edge("prep", "prep", "fpga")
    graph.edge("fpga", "fpga", "post")
    graph.edge("DONE", "post", "fpga")
    graph.edge("DONE", "post", None)

    if not args['servermode']:
        graph.serve(background=True)
        img_paths = xdnn_io.getFilePaths(args['images'])
        reqProc = mp.Process(target=request_process,
                             args=(
                                 args,
                                 img_paths,
                                 graph._in[0],
                                 graph._out[0],
                             ))
        reqProc.start()
        reqProc.join()
        graph.stop(kill=False)
    else:
        print("Serving %s -> %s" % (graph._in[0], graph._out[0]))
        graph.serve()