Пример #1
0
def main():
    args = build_argparser().parse_args()

    # Plugin initialization
    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    if 'GPU' in args.device:
        core.set_property("GPU", {"GPU_ENABLE_LOOP_UNROLLING": "NO", "CACHE_DIR": "./"})

    # Read IR
    log.info('Reading model {}'.format(args.model))
    model = core.read_model(args.model)

    if len(model.inputs) != 1:
        raise RuntimeError("Demo supports only single input topologies")
    input_tensor_name = model.inputs[0].get_any_name()

    if args.output_blob is not None:
        output_tensor_name = args.output_blob
    else:
        if len(model.outputs) != 1:
            raise RuntimeError("Demo supports only single output topologies")
        output_tensor_name = model.outputs[0].get_any_name()

    characters = get_characters(args)
    codec = CTCCodec(characters, args.designated_characters, args.top_k)
    if len(codec.characters) != model.output(output_tensor_name).shape[2]:
        raise RuntimeError("The text recognition model does not correspond to decoding character list")

    input_batch_size, input_channel, input_height, input_width = model.inputs[0].shape

    # Read and pre-process input image (NOTE: one image only)
    preprocessing_start_time = perf_counter()
    input_image = preprocess_input(args.input, height=input_height, width=input_width)[None, :, :, :]
    preprocessing_total_time = perf_counter() - preprocessing_start_time
    if input_batch_size != input_image.shape[0]:
        raise RuntimeError("The model's input batch size should equal the input image's batch size")
    if input_channel != input_image.shape[1]:
        raise RuntimeError("The model's input channel should equal the input image's channel")

    # Loading model to the plugin
    compiled_model = core.compile_model(model, args.device)
    infer_request = compiled_model.create_infer_request()
    log.info('The model {} is loaded to {}'.format(args.model, args.device))

    # Start sync inference
    start_time = perf_counter()
    for _ in range(args.number_iter):
        infer_request.infer(inputs={input_tensor_name: input_image})
        preds = infer_request.get_tensor(output_tensor_name).data[:]
        result = codec.decode(preds)
        print(result)
    total_latency = ((perf_counter() - start_time) / args.number_iter + preprocessing_total_time) * 1e3
    log.info("Metrics report:")
    log.info("\tLatency: {:.1f} ms".format(total_latency))

    sys.exit()
Пример #2
0
def main():
    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    args = build_argparser().parse_args()

    # Plugin initialization
    ie = IECore()
    # Read IR
    #log.info("Loading network")

    model = 'handwritten-japanese-recognition-0001'
    model = '/Users/imamura/model_2020.2/intel/' + model + '/FP32/' + model
    net = ie.read_network(model + '.xml', model + '.bin')
    #net = ie.read_network(args.model, os.path.splitext(args.model)[0] + ".bin")

    assert len(
        net.input_info) == 1, "Demo supports only single input topologies"
    assert len(net.outputs) == 1, "Demo supports only single output topologies"

    #log.info("Preparing input/output blobs")
    input_blob = next(iter(net.input_info))
    out_blob = next(iter(net.outputs))

    characters = get_characters(args)
    codec = CTCCodec(characters, args.designated_characters, args.top_k)
    assert len(codec.characters) == net.outputs[out_blob].shape[
        2], "The text recognition model does not correspond to decoding character list"

    input_batch_size, input_channel, input_height, input_width = net.input_info[
        input_blob].input_data.shape

    # Read and pre-process input image (NOTE: one image only)
    input_image = preprocess_input(args.input,
                                   height=input_height,
                                   width=input_width)[None, :, :, :]
    assert input_batch_size == input_image.shape[
        0], "The net's input batch size should equal the input image's batch size "
    assert input_channel == input_image.shape[
        1], "The net's input channel should equal the input image's channel"

    # Loading model to the plugin
    #log.info("Loading model to the plugin")
    exec_net = ie.load_network(network=net, device_name=args.device)

    # Start sync inference
    #log.info("Starting inference ({} iterations)".format(args.number_iter))
    infer_time = []
    for i in range(args.number_iter):
        t0 = time.time()
        preds = exec_net.infer(inputs={input_blob: input_image})
        preds = preds[out_blob]
        result = codec.decode(preds)
        print(', '.join(map(str, result)))
        infer_time.append((time.time() - t0) * 1000)
    #log.info("Average throughput: {} ms".format(np.average(np.asarray(infer_time))))

    sys.exit()
Пример #3
0
def main():
    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    args = build_argparser().parse_args()
    model_xml = args.model
    model_bin = os.path.splitext(model_xml)[0] + ".bin"

    # Plugin initialization
    ie = IECore()
    # Read IR
    log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
    net = IENetwork(model=model_xml, weights=model_bin)

    assert len(net.inputs) == 1, "Demo supports only single input topologies"
    assert len(net.outputs) == 1, "Demo supports only single output topologies"

    log.info("Preparing input/output blobs")
    input_blob = next(iter(net.inputs))
    out_blob = next(iter(net.outputs))

    characters = get_characters(args)
    codec = CTCCodec(characters)
    assert len(codec.characters) == net.outputs[out_blob].shape[
        2], "The text recognition model does not correspond to decoding character list"

    input_batch_size, input_channel, input_height, input_width = net.inputs[
        input_blob].shape

    # Read and pre-process input image (NOTE: one image only)
    input_image = preprocess_input(args.input,
                                   height=input_height,
                                   width=input_width)[None, :, :, :]
    assert input_batch_size == input_image.shape[
        0], "The net's input batch size should equal the input image's batch size "
    assert input_channel == input_image.shape[
        1], "The net's input channel should equal the input image's channel"

    # Loading model to the plugin
    log.info("Loading model to the plugin")
    exec_net = ie.load_network(network=net, device_name=args.device)

    # Start sync inference
    log.info("Starting inference ({} iterations)".format(args.number_iter))
    infer_time = []
    for i in range(args.number_iter):
        t0 = time.time()
        preds = exec_net.infer(inputs={input_blob: input_image})
        preds = preds[out_blob]
        result = codec.decode(preds)
        print(result)
        infer_time.append((time.time() - t0) * 1000)
    log.info("Average throughput: {} ms".format(
        np.average(np.asarray(infer_time))))

    sys.exit()
Пример #4
0
def main():
    _H=0
    _W=1
    _C=2

    global g_canvas
    global g_threshold
    global g_UIState
    global g_recogFlag
    global g_clickedFlag

    # Plugin initialization
    core = Core()

    model_root = '.'

    # text-detection-0003  in: (1,768,1280,3)  out: model/link_logits_/add(1,192,320,16) model/segm_logits/add(1,192,320,2)
    model='text-detection-0003'
    model = os.path.join(model_root, 'intel', model, 'FP16', model)
    net_td = core.read_model(model+'.xml')
    ppp = PrePostProcessor(net_td)
    ppp.input().tensor().set_element_type(Type.u8).set_layout(Layout('NHWC'))
    ppp.input().preprocess().resize(ResizeAlgorithm.RESIZE_LINEAR)
    net_td = ppp.build()
    compiled_model_td = core.compile_model(net_td, 'CPU')
    ireq_td = compiled_model_td.create_infer_request()

    # handwritten-japanese-recognition
    model = 'handwritten-japanese-recognition-0001'
    model = os.path.join(model_root, 'intel', model, 'FP16', model)
    net = core.read_model(model+'.xml')
    input_batch_size, input_channel, input_height, input_width = list(net.input(0).get_shape())
    compiled_model = core.compile_model(net, 'CPU')
    ireq = compiled_model.create_infer_request()

    characters = get_characters('data/kondate_nakayosi_char_list.txt')
    codec = CTCCodec(characters)

    clearCanvas()
    cv2.namedWindow('canvas')
    cv2.setMouseCallback('canvas', onMouse)
    cv2.createTrackbar('Link           Threshold', 'canvas', 50, 100, onTrackbarLnk)
    cv2.createTrackbar('Classification Threshold', 'canvas', 15, 100, onTrackbarCls)

    while True:
        g_UIState = 0
        while g_recogFlag==False:
            key=cv2.waitKey(100)
            dispCanvas()
            if key==27:
                return
            if key==ord(' '):
                break
        cv2.waitKey(1)
        g_recogFlag = False
        g_UIState = 1

        print('text detection')
        tensor = np.expand_dims(g_canvas, 0)
        res_td = ireq_td.infer({0: tensor})
        # To access to the inference result, either one of following way is OK.
        link = ireq_td.get_tensor('model/link_logits_/add:0').data   # 'model/link_logits_/add'  1,192,320,16
        segm = ireq_td.get_tensor('model/segm_logits/add:0').data    # 'model/segm_logits/add'   1,192,320,2
        #link = ireq_td.get_tensor(compiled_model_td.output(1)).data   # 'model/link_logits_/add'  1,192,320,16
        #segm = ireq_td.get_tensor(compiled_model_td.output(0)).data    # 'model/segm_logits/add'   1,192,320,2
        rects = postprocess(link, segm, _canvas_x, _canvas_y, g_lnk_th/100., g_cls_th/100.)

        canvas2 = g_canvas.copy()
        for i, rect_ in enumerate(rects):
            rect = ((rect_[0], rect_[1]), (rect_[2], rect_[3]), rect_[4])
            box = cv2.boxPoints(rect).astype(np.int32)
            cv2.polylines(canvas2, [box], True, (255,0,0), 4)

            most_left_idx, most_left = topLeftPoint(box)
            crop = cropRotatedImage(g_canvas, box, most_left_idx)
            input_image = preprocess_input(crop, input_height, input_width)[None,:,:,:]

            res = ireq.infer({0: input_image})
            preds = ireq.get_tensor(compiled_model.output(0)).data
            result = codec.decode(preds)
            print('OCR result ({}): {}'.format(i, result))
            
            canvas2 = putJapaneseText(canvas2, most_left[0], most_left[1], result[0])
            cv2.imshow('canvas', canvas2)
            cv2.waitKey(1)

        cv2.putText(canvas2, 'Hit any key, tap screen or click L-button to continue', (0, 40), cv2.FONT_HERSHEY_PLAIN, 2, (0,0,0), 2)
        cv2.imshow('canvas', canvas2)
        g_clickedFlag=False
        key=-1
        while g_clickedFlag==False and key==-1:
            key=cv2.waitKey(100)

    return
def main():
    args = build_argparser().parse_args()

    # Plugin initialization
    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    ie = IECore()
    ie.set_config(config={
        "GPU_ENABLE_LOOP_UNROLLING": "NO",
        "CACHE_DIR": "./"
    },
                  device_name="GPU")

    # Read IR
    log.info('Reading model {}'.format(args.model))
    net = ie.read_network(args.model, os.path.splitext(args.model)[0] + ".bin")

    assert len(
        net.input_info) == 1, "Demo supports only single input topologies"
    input_blob = next(iter(net.input_info))

    if args.output_blob is not None:
        out_blob = args.output_blob
    else:
        assert len(
            net.outputs) == 1, "Demo supports only single output topologies"
        out_blob = next(iter(net.outputs))

    characters = get_characters(args)
    codec = CTCCodec(characters, args.designated_characters, args.top_k)
    assert len(codec.characters) == net.outputs[out_blob].shape[
        2], "The text recognition model does not correspond to decoding character list"

    input_batch_size, input_channel, input_height, input_width = net.input_info[
        input_blob].input_data.shape

    # Read and pre-process input image (NOTE: one image only)
    preprocessing_start_time = perf_counter()
    input_image = preprocess_input(args.input,
                                   height=input_height,
                                   width=input_width)[None, :, :, :]
    preprocessing_total_time = perf_counter() - preprocessing_start_time
    assert input_batch_size == input_image.shape[
        0], "The net's input batch size should equal the input image's batch size "
    assert input_channel == input_image.shape[
        1], "The net's input channel should equal the input image's channel"

    # Loading model to the plugin
    exec_net = ie.load_network(network=net, device_name=args.device)
    log.info('The model {} is loaded to {}'.format(args.model, args.device))

    # Start sync inference
    start_time = perf_counter()
    for i in range(args.number_iter):
        preds = exec_net.infer(inputs={input_blob: input_image})
        preds = preds[out_blob]
        result = codec.decode(preds)
        print(result)
    total_latency = ((perf_counter() - start_time) / args.number_iter +
                     preprocessing_total_time) * 1e3
    log.info("Metrics report:")
    log.info("\tLatency: {:.1f} ms".format(total_latency))

    sys.exit()
Пример #6
0
def main():
    _H=0
    _W=1
    _C=2

    global g_canvas
    global g_threshold
    global g_UIState
    global g_recogFlag
    global g_clickedFlag

    # Plugin initialization
    ie = IECore()

    # text-detection-0003  in: (1,3,768,1280)  out: model/link_logits_/add(1,16,192,320) model/segm_logits/add(1,2,192,320)
    model='text-detection-0003'
    model = './intel/'+model+'/FP16/'+model
    net_td = ie.read_network(model+'.xml', model+'.bin')
    input_blob_td = next(iter(net_td.inputs))
    out_blob_td   = next(iter(net_td.outputs))
    exec_net_td = ie.load_network(net_td, 'CPU')

    # handwritten-japanese-recognition
    model = 'handwritten-japanese-recognition-0001'
    model = './intel/'+model+'/FP16/'+model
    net = ie.read_network(model+'.xml', model+'.bin')
    input_blob = next(iter(net.inputs))
    out_blob   = next(iter(net.outputs))
    input_batch_size, input_channel, input_height, input_width= net.inputs[input_blob].shape
    exec_net = ie.load_network(net, 'CPU')

    characters = get_characters('data/kondate_nakayosi_char_list.txt')
    codec = CTCCodec(characters)

    clearCanvas()
    cv2.namedWindow('canvas')
    cv2.setMouseCallback('canvas', onMouse)
    cv2.createTrackbar('Threshold', 'canvas', 50, 100, onTrackbar)

    while True:
        g_UIState = 0
        while g_recogFlag==False:
            dispCanvas()
            key=cv2.waitKey(100)
            if key==27:
                return
            if key==ord(' '):
                break
        g_recogFlag = False
        g_UIState = 1

        print('text detection')
        img = cv2.resize(g_canvas, (_canvas_x, _canvas_y))
        img = img.transpose((_C, _H, _W))
        img = img.reshape((1, 3, _canvas_y, _canvas_x))
        res_td = exec_net_td.infer(inputs={input_blob_td: img})
        link = res_td['model/link_logits_/add']     # 1,16,192,320
        segm = res_td['model/segm_logits/add' ]     # 1, 2,192,320
        rects = text_detection_postprocess(link, segm, (_canvas_x, _canvas_y), g_threshold/100., g_threshold/100.)
        print('text detection - completed')

        canvas2 = g_canvas.copy()
        for i, rect in enumerate(rects):
            box = cv2.boxPoints(rect).astype(np.int32)
            cv2.polylines(canvas2, [box], True, (255,0,0), 4)

            most_left_idx, most_left = topLeftPoint(box)
            crop = cropRotatedImage(g_canvas, box, most_left_idx)
            input_image = preprocess_input(crop, input_height, input_width)[None,:,:,:]

            preds = exec_net.infer(inputs={input_blob: input_image})
            preds = preds[out_blob]
            result = codec.decode(preds)
            print('OCR result ({}): {}'.format(i, result))
            
            canvas2 = putJapaneseText(canvas2, most_left[0], most_left[1], result[0])
            cv2.imshow('canvas', canvas2)
            cv2.waitKey(1)

        cv2.putText(canvas2, 'Hit any key, tap screen or click L-button to continue', (0, 40), cv2.FONT_HERSHEY_PLAIN, 2, (0,0,0), 2)
        cv2.imshow('canvas', canvas2)
        g_clickedFlag=False
        key=-1
        while g_clickedFlag==False and key==-1:
            key=cv2.waitKey(100)

    return