def yolo_predict_keras(model, image, anchors, num_classes, model_image_size, conf_threshold): image_data = preprocess_image(image, model_image_size) image_shape = image.size prediction = model.predict([image_data]) if len(anchors) == 5: # YOLOv2 use 5 anchors pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np( prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np( prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) return pred_boxes, pred_classes, pred_scores
def yolo_predict_onnx(model, image, anchors, num_classes, conf_threshold): input_tensors = [] for i, input_tensor in enumerate(model.get_inputs()): input_tensors.append(input_tensor) # assume only 1 input tensor for image assert len(input_tensors) == 1, 'invalid input tensor number.' batch, height, width, channel = input_tensors[0].shape model_image_size = (height, width) # prepare input image image_data = preprocess_image(image, model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(image.size)) feed = {input_tensors[0].name: image_data} prediction = model.run(None, feed) if len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction assert len(prediction) == 1, 'invalid YOLOv2 prediction number.' pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np(prediction[0], image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np(prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) return pred_boxes, pred_classes, pred_scores
def yolo_predict_tflite(interpreter, image, anchors, num_classes, conf_threshold): input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() # check the type of the input tensor #if input_details[0]['dtype'] == np.float32: #floating_model = True height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] model_image_size = (height, width) image_data = preprocess_image(image, model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(image.size)) interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() prediction = [] for output_detail in output_details: output_data = interpreter.get_tensor(output_detail['index']) prediction.append(output_data) if len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction assert len(prediction) == 1, 'invalid YOLOv2 prediction number.' pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np(prediction[0], image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np(prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) return pred_boxes, pred_classes, pred_scores
def handle_prediction(prediction, image_file, image, image_shape, anchors, class_names, model_image_size): start = time.time() if len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction assert len(prediction) == 1, 'invalid YOLOv2 prediction number.' boxes, classes, scores = yolo2_postprocess_np(prediction[0], image_shape, anchors, len(class_names), model_image_size) else: boxes, classes, scores = yolo3_postprocess_np(prediction, image_shape, anchors, len(class_names), model_image_size) end = time.time() print("PostProcess time: {:.8f}ms".format((end - start) * 1000)) print('Found {} boxes for {}'.format(len(boxes), image_file)) for box, cls, score in zip(boxes, classes, scores): xmin, ymin, xmax, ymax = box print("Class: {}, Score: {}, Box: {},{}".format( class_names[cls], score, (xmin, ymin), (xmax, ymax))) colors = get_colors(class_names) image = draw_boxes(image, boxes, classes, scores, class_names, colors) Image.fromarray(image).show() return
def handle_prediction(prediction, image_file, image, image_shape, anchors, class_names, model_input_shape, elim_grid_sense, v5_decode, output_path): start = time.time() if len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction assert len(prediction) == 1, 'invalid YOLOv2 prediction number.' boxes, classes, scores = yolo2_postprocess_np(prediction[0], image_shape, anchors, len(class_names), model_input_shape, elim_grid_sense=elim_grid_sense) else: if v5_decode: boxes, classes, scores = yolo5_postprocess_np(prediction, image_shape, anchors, len(class_names), model_input_shape, elim_grid_sense=True) #enable "elim_grid_sense" by default else: boxes, classes, scores = yolo3_postprocess_np(prediction, image_shape, anchors, len(class_names), model_input_shape, elim_grid_sense=elim_grid_sense) end = time.time() print("PostProcess time: {:.8f}ms".format((end - start) * 1000)) print('Found {} boxes for {}'.format(len(boxes), image_file)) for box, cls, score in zip(boxes, classes, scores): xmin, ymin, xmax, ymax = box print("Class: {}, Score: {}, Box: {},{}".format(class_names[cls], score, (xmin, ymin), (xmax, ymax))) colors = get_colors(len(class_names)) image = draw_boxes(image, boxes, classes, scores, class_names, colors) # save or show result if output_path: os.makedirs(output_path, exist_ok=True) output_file = os.path.join(output_path, os.path.basename(image_file)) Image.fromarray(image).save(output_file) else: Image.fromarray(image).show() return
def yolo_predict_pb(model, image, anchors, num_classes, model_image_size, conf_threshold): # NOTE: TF 1.x frozen pb graph need to specify input/output tensor name # so we need to hardcode the input/output tensor names here to get them from model if len(anchors) == 6: output_tensor_names = [ 'graph/conv2d_1/BiasAdd:0', 'graph/conv2d_3/BiasAdd:0' ] elif len(anchors) == 9: output_tensor_names = [ 'graph/conv2d_3/BiasAdd:0', 'graph/conv2d_8/BiasAdd:0', 'graph/conv2d_13/BiasAdd:0' ] elif len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction output_tensor_names = ['graph/predict_conv/BiasAdd:0'] else: raise ValueError('invalid anchor number') # assume only 1 input tensor for image input_tensor_name = 'graph/image_input:0' # prepare input image image_data = preprocess_image(image, model_image_size) image_shape = image.size # get input/output tensors image_input = model.get_tensor_by_name(input_tensor_name) output_tensors = [ model.get_tensor_by_name(output_tensor_name) for output_tensor_name in output_tensor_names ] with tf.Session(graph=model) as sess: prediction = sess.run(output_tensors, feed_dict={image_input: image_data}) if len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction assert len(prediction) == 1, 'invalid YOLOv2 prediction number.' pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np( prediction[0], image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np( prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) return pred_boxes, pred_classes, pred_scores
def predict(self, image_data, image_shape): num_anchors = len(self.anchors) if num_anchors == 5: # YOLOv2 use 5 anchors out_boxes, out_classes, out_scores = yolo2_postprocess_np(self.yolo_model.predict(image_data), image_shape, self.anchors, len(self.class_names), self.model_image_size, max_boxes=100, elim_grid_sense=self.elim_grid_sense) else: out_boxes, out_classes, out_scores = yolo3_postprocess_np(self.yolo_model.predict(image_data), image_shape, self.anchors, len(self.class_names), self.model_image_size, max_boxes=100, elim_grid_sense=self.elim_grid_sense) return out_boxes, out_classes, out_scores
def yolo_predict_keras(model, image, anchors, num_classes, model_image_size, conf_threshold): image_data = preprocess_image(image, model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(image.size)) prediction = model.predict([image_data]) if len(anchors) == 5: # YOLOv2 use 5 anchors pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np(prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np(prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) return pred_boxes, pred_classes, pred_scores
def predict(self, image_data, image_shape): num_anchors = len(self.anchors) if self.model_type.startswith( 'scaled_yolo4_') or self.model_type.startswith('yolo5_'): # Scaled-YOLOv4 & YOLOv5 entrance, enable "elim_grid_sense" by default out_boxes, out_classes, out_scores = yolo5_postprocess_np( self.yolo_model.predict(image_data), image_shape, self.anchors, len(self.class_names), self.model_image_size, max_boxes=100, confidence=self.score, iou_threshold=self.iou, elim_grid_sense=True) elif self.model_type.startswith('yolo3_') or self.model_type.startswith('yolo4_') or \ self.model_type.startswith('tiny_yolo3_') or self.model_type.startswith('tiny_yolo4_'): # YOLOv3 & v4 entrance out_boxes, out_classes, out_scores = yolo3_postprocess_np( self.yolo_model.predict(image_data), image_shape, self.anchors, len(self.class_names), self.model_image_size, max_boxes=100, confidence=self.score, iou_threshold=self.iou, elim_grid_sense=self.elim_grid_sense) elif self.model_type.startswith( 'yolo2_') or self.model_type.startswith('tiny_yolo2_'): # YOLOv2 entrance out_boxes, out_classes, out_scores = yolo2_postprocess_np( self.yolo_model.predict(image_data), image_shape, self.anchors, len(self.class_names), self.model_image_size, max_boxes=100, confidence=self.score, iou_threshold=self.iou, elim_grid_sense=self.elim_grid_sense) else: raise ValueError('Unsupported model type') return out_boxes, out_classes, out_scores
def yolo_predict_pb(model, image, anchors, num_classes, model_image_size, conf_threshold): # NOTE: TF 1.x frozen pb graph need to specify input/output tensor name # so we hardcode the input/output tensor names here to get them from model if len(anchors) == 6: output_tensor_names = ['graph/predict_conv_1/BiasAdd:0', 'graph/predict_conv_2/BiasAdd:0'] elif len(anchors) == 9: output_tensor_names = ['graph/predict_conv_1/BiasAdd:0', 'graph/predict_conv_2/BiasAdd:0', 'graph/predict_conv_3/BiasAdd:0'] elif len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction output_tensor_names = ['graph/predict_conv/BiasAdd:0'] else: raise ValueError('invalid anchor number') # assume only 1 input tensor for image input_tensor_name = 'graph/image_input:0' # get input/output tensors image_input = model.get_tensor_by_name(input_tensor_name) output_tensors = [model.get_tensor_by_name(output_tensor_name) for output_tensor_name in output_tensor_names] batch, height, width, channel = image_input.shape model_image_size = (int(height), int(width)) # prepare input image image_data = preprocess_image(image, model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(image.size)) with tf.Session(graph=model) as sess: prediction = sess.run(output_tensors, feed_dict={ image_input: image_data }) prediction.sort(key=lambda x: len(x[0])) if len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction assert len(prediction) == 1, 'invalid YOLOv2 prediction number.' pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np(prediction[0], image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np(prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) return pred_boxes, pred_classes, pred_scores
def yolo_predict_mnn(interpreter, session, image, anchors, num_classes, conf_threshold): # assume only 1 input tensor for image input_tensor = interpreter.getSessionInput(session) # get input shape input_shape = input_tensor.getShape() if input_tensor.getDimensionType() == MNN.Tensor_DimensionType_Tensorflow: batch, height, width, channel = input_shape elif input_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe: batch, channel, height, width = input_shape else: # should be MNN.Tensor_DimensionType_Caffe_C4, unsupported now raise ValueError('unsupported input tensor dimension type') model_image_size = (height, width) # prepare input image image_data = preprocess_image(image, model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(image.size)) # use a temp tensor to copy data tmp_input = MNN.Tensor(input_shape, input_tensor.getDataType(),\ image_data, input_tensor.getDimensionType()) input_tensor.copyFrom(tmp_input) interpreter.runSession(session) def get_tensor_list(output_tensors): # transform the output tensor dict to ordered tensor list, for further postprocess # # output tensor list should be like (for YOLOv3): # [ # (name, tensor) for (13, 13, 3, num_classes+5), # (name, tensor) for (26, 26, 3, num_classes+5), # (name, tensor) for (52, 52, 3, num_classes+5) # ] output_list = [] for (output_tensor_name, output_tensor) in output_tensors.items(): tensor_shape = output_tensor.getShape() dim_type = output_tensor.getDimensionType() tensor_height, tensor_width = tensor_shape[2:4] if dim_type == MNN.Tensor_DimensionType_Caffe else tensor_shape[1:3] if len(anchors) == 6: # Tiny YOLOv3 if tensor_height == height//32: output_list.insert(0, (output_tensor_name, output_tensor)) elif tensor_height == height//16: output_list.insert(1, (output_tensor_name, output_tensor)) else: raise ValueError('invalid tensor shape') elif len(anchors) == 9: # YOLOv3 if tensor_height == height//32: output_list.insert(0, (output_tensor_name, output_tensor)) elif tensor_height == height//16: output_list.insert(1, (output_tensor_name, output_tensor)) elif tensor_height == height//8: output_list.insert(2, (output_tensor_name, output_tensor)) else: raise ValueError('invalid tensor shape') elif len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction assert len(output_tensors) == 1, 'YOLOv2 model should have only 1 output tensor.' output_list.insert(0, (output_tensor_name, output_tensor)) else: raise ValueError('invalid anchor number') return output_list output_tensors = interpreter.getSessionOutputAll(session) output_tensor_list = get_tensor_list(output_tensors) prediction = [] for (output_tensor_name, output_tensor) in output_tensor_list: output_shape = output_tensor.getShape() output_elementsize = reduce(mul, output_shape) assert output_tensor.getDataType() == MNN.Halide_Type_Float # copy output tensor to host, for further postprocess tmp_output = MNN.Tensor(output_shape, output_tensor.getDataType(),\ #np.zeros(output_shape, dtype=float), output_tensor.getDimensionType()) tuple(np.zeros(output_shape, dtype=float).reshape(output_elementsize, -1)), output_tensor.getDimensionType()) output_tensor.copyToHostTensor(tmp_output) #tmp_output.printTensorData() output_data = np.array(tmp_output.getData(), dtype=float).reshape(output_shape) # our postprocess code based on TF channel last format, so if the output format # doesn't match, we need to transpose if output_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe: output_data = output_data.transpose((0,2,3,1)) elif output_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe_C4: raise ValueError('unsupported output tensor dimension type') prediction.append(output_data) if len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction assert len(prediction) == 1, 'invalid YOLOv2 prediction number.' pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np(prediction[0], image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np(prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) return pred_boxes, pred_classes, pred_scores
def yolo_predict_mnn(interpreter, session, image, anchors, num_classes, conf_threshold): from functools import reduce from operator import mul # TODO: currently MNN python API only support getting input/output tensor by default or # by name. so we need to hardcode the output tensor names here to get them from model if len(anchors) == 6: output_tensor_names = ['conv2d_1/Conv2D', 'conv2d_3/Conv2D'] elif len(anchors) == 9: output_tensor_names = [ 'conv2d_3/Conv2D', 'conv2d_8/Conv2D', 'conv2d_13/Conv2D' ] elif len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction output_tensor_names = ['predict_conv/Conv2D'] else: raise ValueError('invalid anchor number') # assume only 1 input tensor for image input_tensor = interpreter.getSessionInput(session) # get input shape input_shape = input_tensor.getShape() if input_tensor.getDimensionType() == MNN.Tensor_DimensionType_Tensorflow: batch, height, width, channel = input_shape elif input_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe: batch, channel, height, width = input_shape else: # should be MNN.Tensor_DimensionType_Caffe_C4, unsupported now raise ValueError('unsupported input tensor dimension type') model_image_size = (height, width) # prepare input image image_data = preprocess_image(image, model_image_size) image_shape = image.size # use a temp tensor to copy data # TODO: currently MNN python binding have mem leak when creating MNN.Tensor # from numpy array, only from tuple is good. So we convert input image to tuple input_elementsize = reduce(mul, input_shape) tmp_input = MNN.Tensor(input_shape, input_tensor.getDataType(),\ tuple(image_data.reshape(input_elementsize, -1)), input_tensor.getDimensionType()) input_tensor.copyFrom(tmp_input) interpreter.runSession(session) prediction = [] for output_tensor_name in output_tensor_names: output_tensor = interpreter.getSessionOutput(session, output_tensor_name) output_shape = output_tensor.getShape() assert output_tensor.getDataType() == MNN.Halide_Type_Float # copy output tensor to host, for further postprocess output_elementsize = reduce(mul, output_shape) tmp_output = MNN.Tensor(output_shape, output_tensor.getDataType(),\ tuple(np.zeros(output_shape, dtype=float).reshape(output_elementsize, -1)), output_tensor.getDimensionType()) output_tensor.copyToHostTensor(tmp_output) #tmp_output.printTensorData() output_data = np.array(tmp_output.getData(), dtype=float).reshape(output_shape) # our postprocess code based on TF channel last format, so if the output format # doesn't match, we need to transpose if output_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe: output_data = output_data.transpose((0, 2, 3, 1)) elif output_tensor.getDimensionType( ) == MNN.Tensor_DimensionType_Caffe_C4: raise ValueError('unsupported output tensor dimension type') prediction.append(output_data) if len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction assert len(prediction) == 1, 'invalid YOLOv2 prediction number.' pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np( prediction[0], image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np( prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) return pred_boxes, pred_classes, pred_scores