def validate_yolo_model_tflite(model_path, image_file, anchors, class_names, loop_count): interpreter = interpreter_wrapper.Interpreter(model_path=model_path) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() #print(input_details) #print(output_details) # check the type of the input tensor if input_details[0]['dtype'] == np.float32: floating_model = True img = Image.open(image_file) image = np.array(img, dtype='uint8') height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] image_data = preprocess_image(img, (height, width)) image_shape = img.size # predict once first to bypass the model building time interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() start = time.time() for i in range(loop_count): interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() end = time.time() print("Average Inference time: {:.8f}ms".format((end - start) * 1000 /loop_count)) out_list = [] for output_detail in output_details: output_data = interpreter.get_tensor(output_detail['index']) out_list.append(output_data) start = time.time() predictions = yolo_head(out_list, anchors, num_classes=len(class_names), input_dims=(height, width)) boxes, classes, scores = handle_predictions(predictions, confidence=0.1, iou_threshold=0.4) boxes = adjust_boxes(boxes, image_shape, (height, width)) end = time.time() print("PostProcess time: {:.8f}ms".format((end - start) * 1000)) print('Found {} boxes for {}'.format(len(boxes), image_file)) for box, cls, score in zip(boxes, classes, scores): print("Class: {}, Score: {}".format(class_names[cls], score)) colors = get_colors(class_names) image = draw_boxes(image, boxes, classes, scores, class_names, colors) Image.fromarray(image).show()
def detect_video(self, video_path, output_path): video_in = cv2.VideoCapture(video_path) width, height = int(video_in.get(3)), int(video_in.get(4)) FPS = video_in.get(5) video_out = cv2.VideoWriter() video_out.open(output_path, cv2.VideoWriter_fourcc(*'DIVX'), FPS, (width, height)) # video_out.open(output_path, int(video_in.get(cv2.CAP_PROP_FOURCC)), FPS, (width, height)) width = np.array(width, dtype=float) height = np.array(height, dtype=float) image_shape = (height, width) while video_in.isOpened(): ret, data = video_in.read() if ret == False: break video_array = cv2.cvtColor(data, cv2.COLOR_BGR2RGB) image = Image.fromarray(video_array, mode='RGB') resized_image = image.resize( tuple(reversed(self.model_image_size)), Image.BICUBIC) image_data = np.array(resized_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) draw_boxes(image, out_scores, out_boxes, out_classes, self.class_names, self.colors) video_out.write(cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)) self.sess.close() video_in.release() video_out.release() print("Done.")
def detect_image(self, image): if self.model_image_size != (None, None): assert self.model_image_size[ 0] % 32 == 0, 'Multiples of 32 required' assert self.model_image_size[ 1] % 32 == 0, 'Multiples of 32 required' image_data = preprocess_image(image, self.model_image_size) image_shape = image.size start = time.time() out_boxes, out_classes, out_scores = self.predict( image_data, image_shape) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) end = time.time() print("Inference time: {:.8f}s".format(end - start)) #draw result on input image image_array = np.array(image, dtype='uint8') image_array = draw_boxes(image_array, out_boxes, out_classes, out_scores, self.class_names, self.colors) return Image.fromarray(image_array)
def validate_yolo_model(model, image_file, anchors, class_names, model_image_size, loop_count): image = Image.open(image_file) image_array = np.array(image, dtype='uint8') image_data = preprocess_image(image, model_image_size) image_shape = image.size # predict once first to bypass the model building time model.predict([image_data]) start = time.time() for i in range(loop_count): boxes, classes, scores = yolo3_postprocess_np(model.predict([image_data]), image_shape, anchors, len(class_names), model_image_size) end = time.time() print('Found {} boxes for {}'.format(len(boxes), image_file)) for box, cls, score in zip(boxes, classes, scores): print("Class: {}, Score: {}".format(class_names[cls], score)) colors = get_colors(class_names) image_array = draw_boxes(image_array, boxes, classes, scores, class_names, colors) print("Average Inference time: {:.8f}s".format((end - start)/loop_count)) Image.fromarray(image_array).show()
def validate_yolo_model_mnn(model_path, image_file, anchors, class_names, loop_count): interpreter = MNN.Interpreter(model_path) session = interpreter.createSession() # TODO: currently MNN python API only support getting input/output tensor by default or # by name. so we need to hardcode the output tensor names here to get them from model if len(anchors) == 6: output_tensor_names = ['conv2d_1/Conv2D', 'conv2d_3/Conv2D'] elif len(anchors) == 9: output_tensor_names = [ 'conv2d_3/Conv2D', 'conv2d_8/Conv2D', 'conv2d_13/Conv2D' ] else: raise ValueError('invalid anchor number') # assume only 1 input tensor for image input_tensor = interpreter.getSessionInput(session) # get input shape input_shape = input_tensor.getShape() if input_tensor.getDimensionType() == MNN.Tensor_DimensionType_Tensorflow: batch, height, width, channel = input_shape elif input_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe: batch, channel, height, width = input_shape else: # should be MNN.Tensor_DimensionType_Caffe_C4, unsupported now raise ValueError('unsupported input tensor dimension type') # prepare input image img = Image.open(image_file) image = np.array(img, dtype='uint8') image_data = preprocess_image(img, (height, width)) image_shape = img.size # use a temp tensor to copy data tmp_input = MNN.Tensor(input_shape, input_tensor.getDataType(),\ image_data, input_tensor.getDimensionType()) # predict once first to bypass the model building time input_tensor.copyFrom(tmp_input) interpreter.runSession(session) start = time.time() for i in range(loop_count): input_tensor.copyFrom(tmp_input) interpreter.runSession(session) end = time.time() print("Average Inference time: {:.8f}ms".format( (end - start) * 1000 / loop_count)) out_list = [] for output_tensor_name in output_tensor_names: output_tensor = interpreter.getSessionOutput(session, output_tensor_name) output_shape = output_tensor.getShape() assert output_tensor.getDataType() == MNN.Halide_Type_Float # copy output tensor to host, for further postprocess tmp_output = MNN.Tensor(output_shape, output_tensor.getDataType(),\ np.zeros(output_shape, dtype=float), output_tensor.getDimensionType()) output_tensor.copyToHostTensor(tmp_output) #tmp_output.printTensorData() output_data = np.array(tmp_output.getData(), dtype=float).reshape(output_shape) # our postprocess code based on TF channel last format, so if the output format # doesn't match, we need to transpose if output_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe: output_data = output_data.transpose((0, 2, 3, 1)) elif output_tensor.getDimensionType( ) == MNN.Tensor_DimensionType_Caffe_C4: raise ValueError('unsupported output tensor dimension type') out_list.append(output_data) start = time.time() predictions = yolo3_head(out_list, anchors, num_classes=len(class_names), input_dims=(height, width)) boxes, classes, scores = yolo3_handle_predictions(predictions, confidence=0.1, iou_threshold=0.4) boxes = yolo3_adjust_boxes(boxes, image_shape, (height, width)) end = time.time() print("PostProcess time: {:.8f}ms".format((end - start) * 1000)) print('Found {} boxes for {}'.format(len(boxes), image_file)) for box, cls, score in zip(boxes, classes, scores): print("Class: {}, Score: {}".format(class_names[cls], score)) colors = get_colors(class_names) image = draw_boxes(image, boxes, classes, scores, class_names, colors) Image.fromarray(image).show()
def get_prediction_class_records(model_path, annotation_records, anchors, class_names, model_image_size, conf_threshold, save_result): ''' Do the predict with YOLO model on annotation images to get predict class dict predict class dict would contain image_name, coordinary and score, and sorted by score: pred_classes_records = { 'car': [ ['00001.jpg','94,115,203,232',0.98], ['00002.jpg','82,64,154,128',0.93], ... ], ... } ''' # support of tflite model if model_path.endswith('.tflite'): from tensorflow.lite.python import interpreter as interpreter_wrapper interpreter = interpreter_wrapper.Interpreter(model_path=model_path) interpreter.allocate_tensors() # support of MNN model elif model_path.endswith('.mnn'): interpreter = MNN.Interpreter(model_path) session = interpreter.createSession() # normal keras h5 model else: model = load_model(model_path, compile=False) pred_classes_records = {} for (image_name, gt_records) in annotation_records.items(): image = Image.open(image_name) image_array = np.array(image, dtype='uint8') image_data = preprocess_image(image, model_image_size) image_shape = image.size if model_path.endswith('.tflite'): pred_boxes, pred_classes, pred_scores = yolo_predict_tflite( interpreter, image, anchors, len(class_names), conf_threshold) elif model_path.endswith('.mnn'): pred_boxes, pred_classes, pred_scores = yolo_predict_mnn( interpreter, session, image, anchors, len(class_names), conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np( model.predict([image_data]), image_shape, anchors, len(class_names), model_image_size, max_boxes=100, confidence=conf_threshold) print('Found {} boxes for {}'.format(len(pred_boxes), image_name)) if save_result: gt_boxes, gt_classes, gt_scores = transform_gt_record( gt_records, class_names) result_dir = os.path.join('result', 'detection') touchdir(result_dir) colors = get_colors(class_names) image_array = draw_boxes(image_array, gt_boxes, gt_classes, gt_scores, class_names, colors=None, show_score=False) image_array = draw_boxes(image_array, pred_boxes, pred_classes, pred_scores, class_names, colors) image = Image.fromarray(image_array) # here we handle the RGBA image if (len(image.split()) == 4): r, g, b, a = image.split() image = Image.merge("RGB", (r, g, b)) image.save( os.path.join(result_dir, image_name.split(os.path.sep)[-1])) # Nothing detected if pred_boxes is None or len(pred_boxes) == 0: continue for box, cls, score in zip(pred_boxes, pred_classes, pred_scores): pred_class_name = class_names[cls] xmin, ymin, xmax, ymax = box coordinate = "{},{},{},{}".format(xmin, ymin, xmax, ymax) #append or add predict class item if pred_class_name in pred_classes_records: pred_classes_records[pred_class_name].append( [image_name, coordinate, score]) else: pred_classes_records[pred_class_name] = list( [[image_name, coordinate, score]]) # sort pred_classes_records for each class according to score for pred_class_list in pred_classes_records.values(): pred_class_list.sort(key=lambda ele: ele[2], reverse=True) return pred_classes_records