def thread_job(model_name, input_filename, num_inferences, task_type, device): """Runs classification or detection job on one Python thread.""" tid = threading.get_ident() logging.info('Thread: %d, # inferences: %d, model: %s', tid, num_inferences, model_name) interpreter = make_interpreter(test_utils.test_data_path(model_name), device) interpreter.allocate_tensors() with test_utils.test_image(input_filename) as img: if task_type == 'classification': resize_image = img.resize(common.input_size(interpreter), Image.NEAREST) common.set_input(interpreter, resize_image) elif task_type == 'detection': common.set_resized_input( interpreter, img.size, lambda size: img.resize(size, Image.NEAREST)) else: raise ValueError( 'task_type should be classification or detection, but is given %s' % task_type) for _ in range(num_inferences): interpreter.invoke() if task_type == 'classification': classify.get_classes(interpreter) else: detect.get_objects(interpreter) logging.info('Thread: %d, model: %s done', tid, model_name)
def detect(self, image_shape, image_np, detections: List[Detection]): image_np = cv2.resize(image_np, dsize=self.__model_shape, interpolation=cv2.INTER_LINEAR) limits = np.subtract(itemgetter(1, 0)(image_shape), (1, 1)) image_scale = np.divide(self.__model_shape, limits) inference_start_time = time() common.set_input(self.__interpreter, image_np) self.__interpreter.invoke() objs = detect.get_objects(self.__interpreter, image_scale=image_scale) inference_time = (time() - inference_start_time) * 1000 d = 0 while d < len(objs) and d < len(detections): detection = detections[d] obj = objs[d] detection.label = obj.id + 1 detection.confidence = obj.score detection.bounding_box.y_min = min(obj.bbox.ymin, limits[1]) detection.bounding_box.x_min = min(obj.bbox.xmin, limits[0]) detection.bounding_box.y_max = min(obj.bbox.ymax, limits[1]) detection.bounding_box.x_max = min(obj.bbox.xmax, limits[0]) d += 1 return inference_time
def classify_image(model_file, image_file, image_quantization=None): """Runs image classification and returns result with the highest score. Args: model_file: string, model file name. image_file: string, image file name. image_quantization: (scale: float, zero_point: float), assumed image quantization parameters. Returns: Classification result with the highest score as (index, score) tuple. """ interpreter = make_interpreter(test_data_path(model_file)) interpreter.allocate_tensors() image = test_image(image_file, common.input_size(interpreter)) input_type = common.input_details(interpreter, 'dtype') if np.issubdtype(input_type, np.floating): # This preprocessing is specific to MobileNet V1 with floating point input. image = (input_type(image) - 127.5) / 127.5 if np.issubdtype(input_type, np.integer) and image_quantization: image = rescale_image( image, image_quantization, common.input_details(interpreter, 'quantization'), input_type) common.set_input(interpreter, image) interpreter.invoke() return classify.get_classes(interpreter)[0]
def predict(self, picData): print("\nPredicting image on TPU") print('Shape of data: ', picData.shape) #Call the TPU to detect objects on the image with a neural network common.set_input(self.interpreter, picData) self.interpreter.invoke() result = detect.get_objects(self.interpreter, self.minObjectScore) return result
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', required=True, help='File path of .tflite file.') parser.add_argument('-i', '--input', required=True, help='Image to be classified.') parser.add_argument('-l', '--labels', help='File path of labels file.') parser.add_argument('-k', '--top_k', type=int, default=1, help='Max number of classification results') parser.add_argument('-t', '--threshold', type=float, default=0.0, help='Classification score threshold') parser.add_argument('-c', '--count', type=int, default=5, help='Number of times to run inference') args = parser.parse_args() labels = read_label_file(args.labels) if args.labels else {} interpreter = make_interpreter(*args.model.split('@')) interpreter.allocate_tensors() _, height, width = interpreter.get_input_details()[0]['shape'] size = [height, width] image = Image.open(args.input).resize(size, Image.ANTIALIAS) common.set_input(interpreter, image) trigger = GPIO("/dev/gpiochip2", 13, "out") # pin 37 print('----INFERENCE TIME----') print('Note: The first inference on Edge TPU is slow because it includes', 'loading the model into Edge TPU memory.') #for _ in range(args.count): while (1): start = time.perf_counter() trigger.write(True) time.sleep(0.0005) trigger.write(False) #interpreter.invoke() inference_time = time.perf_counter() - start #classes = classify.get_classes(interpreter, args.top_k, args.threshold) print('%.1fms' % (inference_time * 1000)) print('-------RESULTS--------') for c in classes: print('%s: %.5f' % (labels.get(c.id, c.id), c.score))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of Tflite model.', required=True) parser.add_argument('--labels', help='File path of label file.', required=True) parser.add_argument('--picamera', action='store_true', help="Use PiCamera for image capture", default=False) parser.add_argument('-t', '--threshold', type=float, default=0.5, help='Classification score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.labels) size = common.input_size(interpreter) # Initialize video stream vs = VideoStream(usePiCamera=args.picamera, resolution=(640, 480)).start() time.sleep(1) fps = FPS().start() while True: try: # Read frame from video screenshot = vs.read() image = Image.fromarray(screenshot) image_pred = image.resize(size, Image.ANTIALIAS) common.set_input(interpreter, image_pred) interpreter.invoke() classes = classify.get_classes(interpreter, 1, args.threshold) draw_image(image, classes, labels) if (cv2.waitKey(5) & 0xFF == ord('q')): fps.stop() break fps.update() except KeyboardInterrupt: fps.stop() break print("Elapsed time: " + str(fps.elapsed())) print("Approx FPS: :" + str(fps.fps())) cv2.destroyAllWindows() vs.stop() time.sleep(2)
def object_frame(inputQueue, outputQueue): # interpreter = tf.lite.Interpreter(model_path=TFLITE_PATH+'/model.tflite') if not tpu: interpreter = tflite.Interpreter(model_path=TFLITE_PATH + '/model.tflite') else: if not cust: interpreter = make_interpreter(TFLITE_PATH+\ '/mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite') if cust: interpreter = make_interpreter(TFLITE_PATH+\ '/detect_edgetpu.tflite') interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() # keep looping while True: data_out = [] # check to see if there is a frame in our input queue if not inputQueue.empty(): # grab the frame from the input queue img = inputQueue.get() if not tpu: input_data = np.expand_dims(img, axis=0) input_data = input_data / 127.5 - 1 input_data = np.asarray(input_data, dtype=np.float32) interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() else: common.set_input(interpreter, img) interpreter.invoke() scale = (1, 1) objects = detect.get_objects(interpreter, confThreshold, scale) if not tpu: boxes = interpreter.get_tensor(output_details[0]['index'])[0] classe = interpreter.get_tensor(output_details[1]['index'])[0] score = interpreter.get_tensor(output_details[2]['index'])[0] data_out = [boxes, classe, score] else: if objects: for obj in objects: box = obj.bbox # print('bbox:',obj.bbox) xmin = int(box[0]) ymin = int(box[1]) xmax = int(box[2]) ymax = int(box[3]) data_out = [[[ymin, xmin, ymax, xmax]], obj.id, obj.score] # print('data_out:',data_out ) outputQueue.put(data_out)
def faceinference(interpreter, face, labels, frame, person,i): common.set_input(interpreter, face) interpreter.invoke() classes = classify.get_classes(interpreter, 1, 0.0) pred = [] for class1 in classes: pred.append(str(labels.get(class1.id, class1.id))) print(pred) if person in pred: cv2.imwrite('pics/'+str(i)+'.jpg', frame)
def classification_job(classification_model, image_name, num_inferences): """Runs classification job.""" interpreter = make_interpreter(classification_model, device=':0') interpreter.allocate_tensors() size = common.input_size(interpreter) with open_image(image_name) as image: common.set_input(interpreter, image.resize(size, Image.NEAREST)) for _ in range(num_inferences): interpreter.invoke() classify.get_classes(interpreter, top_k=1)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', required=True, help='Path of the segmentation model.') parser.add_argument('--input', required=True, help='File path of the input image.') parser.add_argument('--output', default='semantic_segmentation_result.jpg', help='File path of the output image.') parser.add_argument( '--keep_aspect_ratio', action='store_true', default=False, help= ('keep the image aspect ratio when down-sampling the image by adding ' 'black pixel padding (zeros) on bottom or right. ' 'By default the image is resized and reshaped without cropping. This ' 'option should be the same as what is applied on input images during ' 'model training. Otherwise the accuracy may be affected and the ' 'bounding box of detection result may be stretched.')) args = parser.parse_args() interpreter = make_interpreter(args.model, device=':0') interpreter.allocate_tensors() width, height = common.input_size(interpreter) img = Image.open(args.input) if args.keep_aspect_ratio: resized_img, _ = common.set_resized_input( interpreter, img.size, lambda size: img.resize(size, Image.ANTIALIAS)) else: resized_img = img.resize((width, height), Image.ANTIALIAS) common.set_input(interpreter, resized_img) interpreter.invoke() result = segment.get_output(interpreter) if len(result.shape) == 3: result = np.argmax(result, axis=-1) # If keep_aspect_ratio, we need to remove the padding area. new_width, new_height = resized_img.size result = result[:new_height, :new_width] mask_img = Image.fromarray(label_to_color_image(result).astype(np.uint8)) # Concat resized input image and processed segmentation results. output_img = Image.new('RGB', (2 * new_width, new_height)) output_img.paste(resized_img, (0, 0)) output_img.paste(mask_img, (width, 0)) output_img.save(args.output) print('Done. Results saved at', args.output)
def process(self, channel, rgbd): robot_pose, camera_pose, img_data, compressed_depth = rgbd img = Image.open(BytesIO(img_data)).convert('RGB') input_img = img.resize(self.input_size, Image.ANTIALIAS) scale = input_img.width / float(img.width), input_img.height / float(img.height) coral_common.set_input(self.interpreter, input_img) self.interpreter.invoke() detections = coral_detection.get_objects(self.interpreter, self.min_threshold, image_scale=scale) if not detections: return depth = decompress(compressed_depth) camera_params = self.camera_params[channel] for detection in detections: category = self.categories.get(detection.id) if category is None: # This is one of the unsupported categories, such as "robot' or 'nothing'. continue threshold = self.thresholds[category] if detection.score >= threshold: xmin = np.clip(detection.bbox.xmin, 0, img.width) xmax = np.clip(detection.bbox.xmax, 0, img.width) ymin = np.clip(detection.bbox.ymin, 0, img.height) ymax = np.clip(detection.bbox.ymax, 0, img.height) patch = [v for v in depth[ymin:ymax, xmin:xmax].reshape((-1)) if v >= self.min_depth and v < self.max_depth] if len(patch) < self.min_valid_depth_pixels: continue d = np.median(patch) u = (xmin + xmax) / 2 v = (ymin + ymax) / 2 # Location of the artifact relative to the camera. x = d y = d * (camera_params['cy'] - u) / camera_params['fx'] z = d * (camera_params['cx'] - v) / camera_params['fy'] # Coordinate of the artifact relative to the robot. robot_rel = transform([x, y, z], camera_pose) # Global coordinate of the artifact. world_xyz = transform(robot_rel, robot_pose) ign_name = NAME2IGN[category] if self.verbose: print(ign_name, world_xyz, detection.score) self.publish('localized_artf', [ign_name, world_xyz]) # Making sure the output depth is compressed. Input depth may or may not be. self.publish('debug_rgbd', [robot_pose, camera_pose, img_data, compress(depth)])
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', required=True, help='File path of .tflite file.') parser.add_argument('-i', '--input', help='Image to be classified.') parser.add_argument('-l', '--labels', help='File path of labels file.') parser.add_argument('-k', '--top_k', type=int, default=1, help='Max number of classification results') parser.add_argument('-t', '--threshold', type=float, default=0.0, help='Classification score threshold') parser.add_argument('-c', '--count', type=int, default=5, help='Number of times to run inference') args = parser.parse_args() labels = read_label_file(args.labels) if args.labels else {} interpreter = make_interpreter(*args.model.split('@')) interpreter.allocate_tensors() _, height, width = interpreter.get_input_details()[0]['shape'] size = [height, width] trigger = GPIO("/dev/gpiochip2", 13, "out") # pin 37 print('----INFERENCE TIME----') print('Note: The first inference on Edge TPU is slow because it includes', 'loading the model into Edge TPU memory.') #for i in range(1,351): while 1: #input_image_name = "./testSample/img_"+ str(i) + ".jpg" #input_image_name = "./testSample/img_1.jpg" #image = Image.open(input_image_name).resize(size, Image.ANTIALIAS) arr = numpy.random.randint(0,255,(28,28), dtype='uint8') image = Image.fromarray(arr, 'L').resize(size, Image.ANTIALIAS) common.set_input(interpreter, image) start = time.perf_counter() trigger.write(True) interpreter.invoke() trigger.write(False) inference_time = time.perf_counter() - start print('%.6fms' % (inference_time * 1000)) classes = classify.get_classes(interpreter, args.top_k, args.threshold) print('RESULTS for image ', 1) for c in classes: print('%s: %.6f' % (labels.get(c.id, c.id), c.score))
def segment_image(model_file, image_file, mask_file): interpreter = make_interpreter(test_data_path(model_file)) interpreter.allocate_tensors() image = Image.open(test_data_path(image_file)).resize( common.input_size(interpreter), Image.ANTIALIAS) common.set_input(interpreter, image) interpreter.invoke() result = segment.get_output(interpreter) if len(result.shape) > 2: result = np.argmax(result, axis=2) reference = np.asarray(Image.open(test_data_path(mask_file))) return array_iou(result, reference)
def getimagedata(message): message = bytes(message, encoding='utf-8') message = message[message.find(b'/9'):] pimage = Image.open(io.BytesIO(base64.b64decode(message))) pimage = cv2.cvtColor(np.array(pimage), cv2.COLOR_RGB2BGR) pimage = detectface(pimage) pimage = cv2.resize(pimage, (224, 224)) pimage = cv2.flip(pimage, 1) common.set_input(interpreter, pimage) interpreter.invoke() classes = classify.get_classes(interpreter, 1, 0.0) for class1 in classes: pred = str(labels.get(class1.id, class1.id)) + " " + str(class1.score) print(pred) emit('predresult', pred)
def run_benchmark(model): """Measures training time for given model with random data. Args: model: string, file name of the input model. Returns: float, training time in ms. """ engine = ImprintingEngine(test_utils.test_data_path(model), keep_classes=False) extractor = make_interpreter(engine.serialize_extractor_model(), device=':0') extractor.allocate_tensors() width, height = common.input_size(extractor) np.random.seed(12345) # 10 Categories, each has 20 images. data_by_category = collections.defaultdict(list) for i in range(10): for _ in range(20): data_by_category[i].append( np.random.randint(0, 256, (height, width, 3), dtype=np.uint8)) delegate = load_edgetpu_delegate({'device': ':0'}) inference_time = 0. for class_id, tensors in enumerate(data_by_category.values()): for tensor in tensors: common.set_input(extractor, tensor) extractor.invoke() engine.train(classify.get_scores(extractor), class_id=class_id) start = time.perf_counter() interpreter = tflite.Interpreter( model_content=engine.serialize_model(), experimental_delegates=[delegate]) interpreter.allocate_tensors() common.set_input(interpreter, tensors[0]) interpreter.invoke() classify.get_classes(interpreter, top_k=3) inference_time += (time.perf_counter() - start) * 1000 print('Model: %s' % model) print('Inference time: %.2fms' % inference_time) return inference_time
def main(): # 入力変数(配列)設定 -> parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', default='mobilenet_v2_1.0_224_inat_bird_quant_edgetpu.tflite', help='File path of .tflite file.') parser.add_argument('-i', '--input', default='parrot.jpg', help='Image to be classified.') parser.add_argument('-l', '--labels', default='inat_bird_labels.txt', help='File path of labels file.') parser.add_argument('-k', '--top_k', type=int, default=1, help='Max number of classification results') parser.add_argument('-t', '--threshold', type=float, default=0.0, help='Classification score threshold') parser.add_argument('-c', '--count', type=int, default=5, help='Number of times to run inference') args = parser.parse_args() # 入力変数(配列)設定 <- # ラベル読み込み labels = read_label_file(args.labels) if args.labels else {} # モデル読み込み。Coral使用、未使用でモデル異なる interpreter = make_interpreter(*args.model.split('@')) # 推論用メモリ確保。モデル読み込み直後に実行必須 interpreter.allocate_tensors() size = common.input_size(interpreter) # 入力ファイルをRGB変換しinterpreterサイズに変更 image = Image.open(args.input).convert('RGB').resize(size, Image.ANTIALIAS) # interpreterに入力イメージをセット common.set_input(interpreter, image) print('----INFERENCE TIME----') print('Note: The first inference on Edge TPU is slow because it includes', 'loading the model into Edge TPU memory.') # 入力変数(配列)で指定した回数分推論を繰り返す for _ in range(args.count): start = time.perf_counter() # 推論時間測定開始 interpreter.invoke() # 推論 inference_time = time.perf_counter() - start # 推論時間測定終了 # 入力変数(配列)で指定した一致率(args.threshold)以上のラベルの上位args.top_kを取得する classes = classify.get_classes(interpreter, args.top_k, args.threshold) print('%.1fms' % (inference_time * 1000)) # 推論時間表示 print('-------RESULTS--------') for c in classes: print('%s: %.5f' % (labels.get(c.id, c.id), c.score))
def get_classes(self, frame, top_k=1, threshold=0.0): """ Gets classification results as a list of ordered classes. Args: frame: The bitmap image to pass through the model. top_k: The number of top results to return. threshold: The minimum confidence score for returned results. Returns: A list of `Class` objects representing the classification results, ordered by scores. See https://coral.ai/docs/reference/py/pycoral.adapters/#pycoral.adapters.classify.Class """ size = common.input_size(self.interpreter) common.set_input(self.interpreter, cv2.resize(frame, size, fx=0, fy=0, interpolation = cv2.INTER_CUBIC)) self.interpreter.invoke() return classify.get_classes(self.interpreter, top_k, threshold)
def image(): global running global labels global interpreter if running: return Response(response="{}", status=429, mimetype="application/json") running = True # Run an inference interpreter.allocate_tensors() size = common.input_size(interpreter) image = Image.open(request.data).convert('RGB').resize( size, Image.ANTIALIAS) common.set_input(interpreter, image) interpreter.invoke() classes = classify.get_classes(interpreter, top_k=3) nomouseValue = 0 mouseValue = 0 # Print the result for c in classes: label = labels.get(c.id, c.id) score = c.score if label == "nomouse": nomouseValue = score if label == "mouse": mouseValue = score running = False # build a response dict to send back to client response = { 'tags': { 'mouse': float(mouseValue), 'nomouse': float(nomouseValue) } } # encode response using jsonpickle response_pickled = jsonpickle.encode(response) return Response(response=response_pickled, status=200, mimetype="application/json")
def classification_task(num_inferences): tid = threading.get_ident() print('Thread: %d, %d inferences for classification task' % (tid, num_inferences)) labels = read_label_file(test_utils.test_data_path('imagenet_labels.txt')) model_name = 'mobilenet_v1_1.0_224_quant_edgetpu.tflite' interpreter = make_interpreter( test_utils.test_data_path(model_name), device=':0') interpreter.allocate_tensors() size = common.input_size(interpreter) print('Thread: %d, using device 0' % tid) with test_utils.test_image('cat.bmp') as img: for _ in range(num_inferences): common.set_input(interpreter, img.resize(size, Image.NEAREST)) interpreter.invoke() ret = classify.get_classes(interpreter, top_k=1) self.assertEqual(len(ret), 1) self.assertEqual(labels[ret[0].id], 'Egyptian cat') print('Thread: %d, done classification task' % tid)
def run_two_models_one_tpu(classification_model, detection_model, image_name, num_inferences, batch_size): """Runs two models ALTERNATIVELY using one Edge TPU. It runs classification model `batch_size` times and then switch to run detection model `batch_size` time until each model is run `num_inferences` times. Args: classification_model: string, path to classification model detection_model: string, path to detection model. image_name: string, path to input image. num_inferences: int, number of inferences to run for each model. batch_size: int, indicates how many inferences to run one model before switching to the other one. Returns: double, wall time it takes to finish the job. """ start_time = time.perf_counter() interpreter_a = make_interpreter(classification_model, device=':0') interpreter_a.allocate_tensors() interpreter_b = make_interpreter(detection_model, device=':0') interpreter_b.allocate_tensors() with open_image(image_name) as image: size_a = common.input_size(interpreter_a) common.set_input(interpreter_a, image.resize(size_a, Image.NEAREST)) _, scale_b = common.set_resized_input( interpreter_b, image.size, lambda size: image.resize(size, Image.NEAREST)) num_iterations = (num_inferences + batch_size - 1) // batch_size for _ in range(num_iterations): for _ in range(batch_size): interpreter_a.invoke() classify.get_classes(interpreter_a, top_k=1) for _ in range(batch_size): interpreter_b.invoke() detect.get_objects(interpreter_b, score_threshold=0., image_scale=scale_b) return time.perf_counter() - start_time
def extract_embeddings(image_paths, interpreter): """Uses model to process images as embeddings. Reads image, resizes and feeds to model to get feature embeddings. Original image is discarded to keep maximum memory consumption low. Args: image_paths: ndarray, represents a list of image paths. interpreter: TFLite interpreter, wraps embedding extractor model. Returns: ndarray of length image_paths.shape[0] of embeddings. """ input_size = common.input_size(interpreter) feature_dim = classify.num_classes(interpreter) embeddings = np.empty((len(image_paths), feature_dim), dtype=np.float32) for idx, path in enumerate(image_paths): with test_image(path) as img: common.set_input(interpreter, img.resize(input_size, Image.NEAREST)) interpreter.invoke() embeddings[idx, :] = classify.get_scores(interpreter) return embeddings
def run_benchmark(model): """Measures training time for given model with random data. Args: model: string, file name of the input model. Returns: float, training time in ms. """ engine = ImprintingEngine( test_utils.test_data_path(model), keep_classes=False) extractor = make_interpreter(engine.serialize_extractor_model()) extractor.allocate_tensors() width, height = common.input_size(extractor) np.random.seed(12345) # 10 Categories, each has 20 images. data_by_category = collections.defaultdict(list) for i in range(10): for _ in range(20): data_by_category[i].append( np.random.randint(0, 256, (height, width, 3), dtype=np.uint8)) start = time.perf_counter() for class_id, tensors in enumerate(data_by_category.values()): for tensor in tensors: common.set_input(extractor, tensor) extractor.invoke() engine.train(classify.get_scores(extractor), class_id=class_id) engine.serialize_model() training_time = (time.perf_counter() - start) * 1000 print('Model: %s' % model) print('Training time: %.2fms' % training_time) return training_time
def predict(): data = {"success": False} if flask.request.method == "POST": if flask.request.files.get("image"): image_file = flask.request.files["image"] image = Image.open(image_file).convert('RGB').resize( HOLDER['size'], Image.ANTIALIAS) params = common.input_details(HOLDER['interpreter'], 'quantization_parameters') scale = params['scales'] zero_point = params['zero_points'] mean = 128.0 std = 128.0 if abs(scale * std - 1) < 1e-5 and abs(mean - zero_point) < 1e-5: # Input data does not require preprocessing. common.set_input(HOLDER['interpreter'], image) else: # Input data requires preprocessing normalized_input = (np.asarray(image) - mean) / (std * scale) + zero_point np.clip(normalized_input, 0, 255, out=normalized_input) common.set_input(HOLDER['interpreter'], normalized_input.astype(np.uint8)) start = time.perf_counter() HOLDER['interpreter'].invoke() inference_time = time.perf_counter() - start classes = classify.get_classes(HOLDER['interpreter'], HOLDER['top_k'], 0.0) if classes: data["success"] = True data["inference-time"] = '%.2f ms' % (inference_time * 1000) preds = [] for c in classes: preds.append({ "score": float(c.score), "label": HOLDER['labels'].get(c.id, c.id) }) data["predictions"] = preds return flask.jsonify(data)
def run_two_models_one_tpu(classification_model, detection_model, image_name, num_inferences, batch_size): start_time = time.perf_counter() interpreter_a = make_interpreter(classification_model, device=':0') interpreter_a.allocate_tensors() interpreter_b = make_interpreter(detection_model, device=':0') interpreter_b.allocate_tensors() identification = [] classification = [] with open_image(image_name) as image: size_a = common.input_size(interpreter_a) common.set_input(interpreter_a, image.resize(size_a, Image.NEAREST)) _, scale_b = common.set_resized_input( interpreter_b, image.size, lambda size: image.resize(size, Image.NEAREST)) num_iterations = (num_inferences + batch_size - 1) // batch_size for _ in tqdm(range(num_iterations)): for _ in range(batch_size): identification_start_time = time.perf_counter() interpreter_b.invoke() detect.get_objects(interpreter_b, score_threshold=0., image_scale=scale_b) identification.append(time.perf_counter() - identification_start_time) for _ in range(batch_size): classification_start_time = time.perf_counter() interpreter_a.invoke() result1 = classify.get_classes(interpreter_a, top_k=4) interpreter_a.invoke() result2 = classify.get_classes(interpreter_a, top_k=4) interpreter_a.invoke() result3 = classify.get_classes(interpreter_a, top_k=4) classification.append(time.perf_counter() - classification_start_time) total_time = time.perf_counter() - start_time return total_time, identification, classification
def predict(): data = {"success": False} if flask.request.method == "POST": if flask.request.files.get("image"): image_file = flask.request.files["image"] image_bytes = image_file.read() image = Image.open(io.BytesIO(image_bytes)) size = common.input_size(interpreter) image = image.convert("RGB").resize(size, Image.ANTIALIAS) # Run an inference common.set_input(interpreter, image) interpreter.invoke() _, scale = common.set_resized_input( interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) threshold = 0.4 objs = detect.get_objects(interpreter, threshold, scale) if objs: data["success"] = True preds = [] for obj in objs: preds.append({ "confidence": float(obj.score), "label": labels[obj.id], "y_min": int(obj.bbox[1]), "x_min": int(obj.bbox[0]), "y_max": int(obj.bbox[3]), "x_max": int(obj.bbox[2]), }) data["predictions"] = preds # return the data dictionary as a JSON response return flask.jsonify(data)
def train(capture_dir, labels, model, out_model): engine = ImprintingEngine(model, keep_classes=False) extractor = make_interpreter(engine.serialize_extractor_model(), device=':0') extractor.allocate_tensors() for class_id in sorted(labels): class_name = labels[class_id] print('\nClass: %s (id=%d)' % (class_name, class_id)) class_capture_dir = os.path.join(capture_dir, class_name) for img in os.listdir(class_capture_dir): imgpath = os.path.join(class_capture_dir, img) common.set_input(extractor, read_image(imgpath, common.input_size(extractor))) extractor.invoke() embedding = classify.get_scores(extractor) print(' %s => %s' % (imgpath, embedding)) engine.train(embedding, class_id) with open(out_model, 'wb') as f: f.write(engine.serialize_model()) print('\nTrained model was saved to %s' % out_model)
def callback(self, data): cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8") img = Image.fromarray(cv_image) if self.keep_aspect_ratio: resized_img, _ = common.set_resized_input( self.interpreter, img.size, lambda size: img.resize(size, Image.ANTIALIAS)) else: resized_img = img.resize( (self.model_input_width, self.model_input_height), Image.ANTIALIAS) common.set_input(interpreter, resized_img) self.interpreter.invoke() result = segment.get_output(self.interpreter) if len(result.shape) == 3: result = np.argmax(result, axis=-1) # If keep_aspect_ratio, we need to remove the padding area. new_width, new_height = resized_img.size result = result[:new_height, :new_width] mask_img = Image.fromarray( self.label_to_color_image(result).astype(np.uint8)) # Concat resized input image and processed segmentation results. output_img = Image.new('RGB', (2 * new_width, new_height)) output_img.paste(resized_img, (0, 0)) output_img.paste(mask_img, (self.model_input_width, 0)) original_width, original_height = img.size recovered_cvimg = np.array( output_img.resize((2 * original_width, original_height), Image.ANTIALIAS)) cv2.imshow("resizedimg", recovered_cvimg) cv2.waitKey(3)
def _transfer_learn_and_evaluate(self, model_path, keep_classes, dataset_path, test_ratio, top_k_range): """Transfer-learns with given params and returns the evaluation result. Args: model_path: string, path of the base model. keep_classes: bool, whether to keep base model classes. dataset_path: string, path to the directory of dataset. The images should be put under sub-directory named by category. test_ratio: float, the ratio of images used for test. top_k_range: int, top_k range to be evaluated. The function will return accuracy from top 1 to top k. Returns: list of float numbers. """ engine = ImprintingEngine(model_path, keep_classes) extractor = make_interpreter(engine.serialize_extractor_model()) extractor.allocate_tensors() num_classes = engine.num_classes print('--------------- Parsing dataset ----------------') print('Dataset path:', dataset_path) # train in fixed order to ensure the same evaluation result. train_set, test_set = test_utils.prepare_data_set_from_directory( dataset_path, test_ratio, True) print('Image list successfully parsed! Number of Categories = ', len(train_set)) print('--------------- Processing training data ----------------') print('This process may take more than 30 seconds.') train_input = [] labels_map = {} for class_id, (category, image_list) in enumerate(train_set.items()): print('Processing {} ({} images)'.format(category, len(image_list))) train_input.append( [os.path.join(dataset_path, category, image) for image in image_list]) labels_map[num_classes + class_id] = category # train print('---------------- Start training -----------------') size = common.input_size(extractor) for class_id, images in enumerate(train_input): for image in images: with test_image(image) as img: common.set_input(extractor, img.resize(size, Image.NEAREST)) extractor.invoke() engine.train(classify.get_scores(extractor), class_id=num_classes + class_id) print('---------------- Training finished -----------------') with test_utils.temporary_file(suffix='.tflite') as output_model_path: output_model_path.write(engine.serialize_model()) # Evaluate print('---------------- Start evaluating -----------------') classifier = make_interpreter(output_model_path.name) classifier.allocate_tensors() # top[i] represents number of top (i+1) correct inference. top_k_correct_count = [0] * top_k_range image_num = 0 for category, image_list in test_set.items(): n = len(image_list) print('Evaluating {} ({} images)'.format(category, n)) for image_name in image_list: with test_image(os.path.join(dataset_path, category, image_name)) as img: # Set threshold as a negative number to ensure we get top k # candidates even if its score is 0. size = common.input_size(classifier) common.set_input(classifier, img.resize(size, Image.NEAREST)) classifier.invoke() candidates = classify.get_classes(classifier, top_k=top_k_range) for i in range(len(candidates)): candidate = candidates[i] if candidate.id in labels_map and \ labels_map[candidate.id] == category: top_k_correct_count[i] += 1 break image_num += n for i in range(1, top_k_range): top_k_correct_count[i] += top_k_correct_count[i - 1] return [top_k_correct_count[i] / image_num for i in range(top_k_range)]
else: input_details = interpreter.get_input_details() input_shape = input_details[0]['shape'] size = input_shape[1:] print(size) size = (size[0], size[1]) print(f"Model input size: {size}") output = interpreter.tensor(interpreter.get_output_details()[0]["index"]) zeros = np.zeros((size[0], size[1], 1)) print("Starting Inference") i = 0 for path, in0, img, vid_cap in tqdm(dl): in0 = np.moveaxis(in0[0], 0, -1) if tpu: common.set_input(interpreter, cv2.resize(in0, dsize=size)) else: in1 = cv2.resize(np.array(in0, dtype=np.float32), dsize=size) print(in1.shape, input_details[0]['shape']) interpreter.set_tensor(input_details[0]['index'], np.expand_dims(in1, axis=0)) interpreter.invoke() zeros[:, :] = 0 zeros[output()[0] > threshold] = 1 if single: zeros = np.expand_dims(boundary_fill(zeros[:, :, 0], np.array([200, 110]), boundary=0, fill=0), axis=-1) if snake:
def main(): args = _parse_args() engine = ImprintingEngine(args.model_path, keep_classes=args.keep_classes) extractor = make_interpreter(engine.serialize_extractor_model(), device=':0') extractor.allocate_tensors() shape = common.input_size(extractor) print('--------------- Parsing data set -----------------') print('Dataset path:', args.data) train_set, test_set = _read_data(args.data, args.test_ratio) print('Image list successfully parsed! Category Num = ', len(train_set)) print('---------------- Processing training data ----------------') print('This process may take more than 30 seconds.') train_input = [] labels_map = {} for class_id, (category, image_list) in enumerate(train_set.items()): print('Processing category:', category) train_input.append( _prepare_images(image_list, os.path.join(args.data, category), shape)) labels_map[class_id] = category print('---------------- Start training -----------------') num_classes = engine.num_classes for class_id, tensors in enumerate(train_input): for tensor in tensors: common.set_input(extractor, tensor) extractor.invoke() embedding = classify.get_scores(extractor) engine.train(embedding, class_id=num_classes + class_id) print('---------------- Training finished! -----------------') with open(args.output, 'wb') as f: f.write(engine.serialize_model()) print('Model saved as : ', args.output) _save_labels(labels_map, args.output) print('------------------ Start evaluating ------------------') interpreter = make_interpreter(args.output) interpreter.allocate_tensors() size = common.input_size(interpreter) top_k = 5 correct = [0] * top_k wrong = [0] * top_k for category, image_list in test_set.items(): print('Evaluating category [', category, ']') for img_name in image_list: img = Image.open(os.path.join(args.data, category, img_name)).resize( size, Image.NEAREST) common.set_input(interpreter, img) interpreter.invoke() candidates = classify.get_classes(interpreter, top_k, score_threshold=0.1) recognized = False for i in range(top_k): if i < len(candidates) and labels_map[ candidates[i].id] == category: recognized = True if recognized: correct[i] = correct[i] + 1 else: wrong[i] = wrong[i] + 1 print('---------------- Evaluation result -----------------') for i in range(top_k): print('Top {} : {:.0%}'.format(i + 1, correct[i] / (correct[i] + wrong[i])))