def validate_yolo_model_tflite(model_path, image_file, anchors, class_names, loop_count):
    interpreter = interpreter_wrapper.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    #print(input_details)
    #print(output_details)

    # check the type of the input tensor
    if input_details[0]['dtype'] == np.float32:
        floating_model = True

    img = Image.open(image_file)
    image = np.array(img, dtype='uint8')

    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    image_data = preprocess_image(img, (height, width))
    image_shape = img.size

    # predict once first to bypass the model building time
    interpreter.set_tensor(input_details[0]['index'], image_data)
    interpreter.invoke()

    start = time.time()
    for i in range(loop_count):
        interpreter.set_tensor(input_details[0]['index'], image_data)
        interpreter.invoke()
    end = time.time()
    print("Average Inference time: {:.8f}ms".format((end - start) * 1000 /loop_count))

    out_list = []
    for output_detail in output_details:
        output_data = interpreter.get_tensor(output_detail['index'])
        out_list.append(output_data)

    start = time.time()
    predictions = yolo_head(out_list, anchors, num_classes=len(class_names), input_dims=(height, width))

    boxes, classes, scores = handle_predictions(predictions, confidence=0.1, iou_threshold=0.4)
    boxes = adjust_boxes(boxes, image_shape, (height, width))
    end = time.time()
    print("PostProcess time: {:.8f}ms".format((end - start) * 1000))

    print('Found {} boxes for {}'.format(len(boxes), image_file))

    for box, cls, score in zip(boxes, classes, scores):
        print("Class: {}, Score: {}".format(class_names[cls], score))

    colors = get_colors(class_names)
    image = draw_boxes(image, boxes, classes, scores, class_names, colors)

    Image.fromarray(image).show()
예제 #2
0
    def detect_video(self, video_path, output_path):
        video_in = cv2.VideoCapture(video_path)
        width, height = int(video_in.get(3)), int(video_in.get(4))
        FPS = video_in.get(5)

        video_out = cv2.VideoWriter()
        video_out.open(output_path, cv2.VideoWriter_fourcc(*'DIVX'), FPS,
                       (width, height))
        # video_out.open(output_path, int(video_in.get(cv2.CAP_PROP_FOURCC)), FPS, (width, height))

        width = np.array(width, dtype=float)
        height = np.array(height, dtype=float)
        image_shape = (height, width)

        while video_in.isOpened():
            ret, data = video_in.read()
            if ret == False: break
            video_array = cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(video_array, mode='RGB')
            resized_image = image.resize(
                tuple(reversed(self.model_image_size)), Image.BICUBIC)
            image_data = np.array(resized_image, dtype='float32')

            image_data /= 255.
            image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

            out_boxes, out_scores, out_classes = self.sess.run(
                [self.boxes, self.scores, self.classes],
                feed_dict={
                    self.yolo_model.input: image_data,
                    self.input_image_shape: [image.size[1], image.size[0]],
                    K.learning_phase(): 0
                })
            draw_boxes(image, out_scores, out_boxes, out_classes,
                       self.class_names, self.colors)
            video_out.write(cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR))

        self.sess.close()
        video_in.release()
        video_out.release()
        print("Done.")
예제 #3
0
    def detect_image(self, image):
        if self.model_image_size != (None, None):
            assert self.model_image_size[
                0] % 32 == 0, 'Multiples of 32 required'
            assert self.model_image_size[
                1] % 32 == 0, 'Multiples of 32 required'

        image_data = preprocess_image(image, self.model_image_size)
        image_shape = image.size

        start = time.time()
        out_boxes, out_classes, out_scores = self.predict(
            image_data, image_shape)
        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
        end = time.time()
        print("Inference time: {:.8f}s".format(end - start))

        #draw result on input image
        image_array = np.array(image, dtype='uint8')
        image_array = draw_boxes(image_array, out_boxes, out_classes,
                                 out_scores, self.class_names, self.colors)
        return Image.fromarray(image_array)
def validate_yolo_model(model, image_file, anchors, class_names, model_image_size, loop_count):
    image = Image.open(image_file)
    image_array = np.array(image, dtype='uint8')
    image_data = preprocess_image(image, model_image_size)
    image_shape = image.size

    # predict once first to bypass the model building time
    model.predict([image_data])

    start = time.time()
    for i in range(loop_count):
        boxes, classes, scores = yolo3_postprocess_np(model.predict([image_data]), image_shape, anchors, len(class_names), model_image_size)
    end = time.time()

    print('Found {} boxes for {}'.format(len(boxes), image_file))

    for box, cls, score in zip(boxes, classes, scores):
        print("Class: {}, Score: {}".format(class_names[cls], score))

    colors = get_colors(class_names)
    image_array = draw_boxes(image_array, boxes, classes, scores, class_names, colors)
    print("Average Inference time: {:.8f}s".format((end - start)/loop_count))

    Image.fromarray(image_array).show()
def validate_yolo_model_mnn(model_path, image_file, anchors, class_names,
                            loop_count):
    interpreter = MNN.Interpreter(model_path)
    session = interpreter.createSession()

    # TODO: currently MNN python API only support getting input/output tensor by default or
    # by name. so we need to hardcode the output tensor names here to get them from model
    if len(anchors) == 6:
        output_tensor_names = ['conv2d_1/Conv2D', 'conv2d_3/Conv2D']
    elif len(anchors) == 9:
        output_tensor_names = [
            'conv2d_3/Conv2D', 'conv2d_8/Conv2D', 'conv2d_13/Conv2D'
        ]
    else:
        raise ValueError('invalid anchor number')

    # assume only 1 input tensor for image
    input_tensor = interpreter.getSessionInput(session)
    # get input shape
    input_shape = input_tensor.getShape()
    if input_tensor.getDimensionType() == MNN.Tensor_DimensionType_Tensorflow:
        batch, height, width, channel = input_shape
    elif input_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe:
        batch, channel, height, width = input_shape
    else:
        # should be MNN.Tensor_DimensionType_Caffe_C4, unsupported now
        raise ValueError('unsupported input tensor dimension type')

    # prepare input image
    img = Image.open(image_file)
    image = np.array(img, dtype='uint8')
    image_data = preprocess_image(img, (height, width))
    image_shape = img.size

    # use a temp tensor to copy data
    tmp_input = MNN.Tensor(input_shape, input_tensor.getDataType(),\
                    image_data, input_tensor.getDimensionType())

    # predict once first to bypass the model building time
    input_tensor.copyFrom(tmp_input)
    interpreter.runSession(session)

    start = time.time()
    for i in range(loop_count):
        input_tensor.copyFrom(tmp_input)
        interpreter.runSession(session)
    end = time.time()
    print("Average Inference time: {:.8f}ms".format(
        (end - start) * 1000 / loop_count))

    out_list = []
    for output_tensor_name in output_tensor_names:
        output_tensor = interpreter.getSessionOutput(session,
                                                     output_tensor_name)
        output_shape = output_tensor.getShape()

        assert output_tensor.getDataType() == MNN.Halide_Type_Float

        # copy output tensor to host, for further postprocess
        tmp_output = MNN.Tensor(output_shape, output_tensor.getDataType(),\
                    np.zeros(output_shape, dtype=float), output_tensor.getDimensionType())

        output_tensor.copyToHostTensor(tmp_output)
        #tmp_output.printTensorData()

        output_data = np.array(tmp_output.getData(),
                               dtype=float).reshape(output_shape)
        # our postprocess code based on TF channel last format, so if the output format
        # doesn't match, we need to transpose
        if output_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe:
            output_data = output_data.transpose((0, 2, 3, 1))
        elif output_tensor.getDimensionType(
        ) == MNN.Tensor_DimensionType_Caffe_C4:
            raise ValueError('unsupported output tensor dimension type')

        out_list.append(output_data)

    start = time.time()
    predictions = yolo3_head(out_list,
                             anchors,
                             num_classes=len(class_names),
                             input_dims=(height, width))

    boxes, classes, scores = yolo3_handle_predictions(predictions,
                                                      confidence=0.1,
                                                      iou_threshold=0.4)
    boxes = yolo3_adjust_boxes(boxes, image_shape, (height, width))
    end = time.time()
    print("PostProcess time: {:.8f}ms".format((end - start) * 1000))

    print('Found {} boxes for {}'.format(len(boxes), image_file))

    for box, cls, score in zip(boxes, classes, scores):
        print("Class: {}, Score: {}".format(class_names[cls], score))

    colors = get_colors(class_names)
    image = draw_boxes(image, boxes, classes, scores, class_names, colors)

    Image.fromarray(image).show()
예제 #6
0
def get_prediction_class_records(model_path, annotation_records, anchors,
                                 class_names, model_image_size, conf_threshold,
                                 save_result):
    '''
    Do the predict with YOLO model on annotation images to get predict class dict

    predict class dict would contain image_name, coordinary and score, and
    sorted by score:
    pred_classes_records = {
        'car': [
                ['00001.jpg','94,115,203,232',0.98],
                ['00002.jpg','82,64,154,128',0.93],
                ...
               ],
        ...
    }
    '''

    # support of tflite model
    if model_path.endswith('.tflite'):
        from tensorflow.lite.python import interpreter as interpreter_wrapper
        interpreter = interpreter_wrapper.Interpreter(model_path=model_path)
        interpreter.allocate_tensors()
    # support of MNN model
    elif model_path.endswith('.mnn'):
        interpreter = MNN.Interpreter(model_path)
        session = interpreter.createSession()
    # normal keras h5 model
    else:
        model = load_model(model_path, compile=False)

    pred_classes_records = {}
    for (image_name, gt_records) in annotation_records.items():
        image = Image.open(image_name)
        image_array = np.array(image, dtype='uint8')
        image_data = preprocess_image(image, model_image_size)
        image_shape = image.size

        if model_path.endswith('.tflite'):
            pred_boxes, pred_classes, pred_scores = yolo_predict_tflite(
                interpreter, image, anchors, len(class_names), conf_threshold)
        elif model_path.endswith('.mnn'):
            pred_boxes, pred_classes, pred_scores = yolo_predict_mnn(
                interpreter, session, image, anchors, len(class_names),
                conf_threshold)
        else:
            pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np(
                model.predict([image_data]),
                image_shape,
                anchors,
                len(class_names),
                model_image_size,
                max_boxes=100,
                confidence=conf_threshold)

        print('Found {} boxes for {}'.format(len(pred_boxes), image_name))

        if save_result:

            gt_boxes, gt_classes, gt_scores = transform_gt_record(
                gt_records, class_names)

            result_dir = os.path.join('result', 'detection')
            touchdir(result_dir)
            colors = get_colors(class_names)
            image_array = draw_boxes(image_array,
                                     gt_boxes,
                                     gt_classes,
                                     gt_scores,
                                     class_names,
                                     colors=None,
                                     show_score=False)
            image_array = draw_boxes(image_array, pred_boxes, pred_classes,
                                     pred_scores, class_names, colors)
            image = Image.fromarray(image_array)
            # here we handle the RGBA image
            if (len(image.split()) == 4):
                r, g, b, a = image.split()
                image = Image.merge("RGB", (r, g, b))
            image.save(
                os.path.join(result_dir,
                             image_name.split(os.path.sep)[-1]))

        # Nothing detected
        if pred_boxes is None or len(pred_boxes) == 0:
            continue

        for box, cls, score in zip(pred_boxes, pred_classes, pred_scores):
            pred_class_name = class_names[cls]
            xmin, ymin, xmax, ymax = box
            coordinate = "{},{},{},{}".format(xmin, ymin, xmax, ymax)

            #append or add predict class item
            if pred_class_name in pred_classes_records:
                pred_classes_records[pred_class_name].append(
                    [image_name, coordinate, score])
            else:
                pred_classes_records[pred_class_name] = list(
                    [[image_name, coordinate, score]])

    # sort pred_classes_records for each class according to score
    for pred_class_list in pred_classes_records.values():
        pred_class_list.sort(key=lambda ele: ele[2], reverse=True)

    return pred_classes_records