import cv2 as cv import time from openvino.inference_engine import IECore face_xml = "./intel/face-detection-0205/FP16-INT8/face-detection-0205.xml" face_bin = "./intel/face-detection-0205/FP16-INT8/face-detection-0205.bin" ie = IECore() for device in ie.available_devices: print(device) # Read IR net = ie.read_network(model=face_xml, weights=face_bin) input_blob = next(iter(net.input_info)) out_blob = next(iter(net.outputs)) # 输入设置 n, c, h, w = net.input_info[input_blob].input_data.shape # 设备关联推理创建 exec_net = ie.load_network(network=net, device_name="CPU") # cap = cv.VideoCapture("./people-detection.mp4") cap = cv.VideoCapture(0) while True: inf_start = time.time() ret, src = cap.read() if ret is not True:
def test_batch_size_getter(): ie = IECore() net = ie.read_network(model=test_net_xml, weights=test_net_bin) assert net.batch_size == 1
def test_name(): ie = IECore() net = ie.read_network(model=test_net_xml, weights=test_net_bin) assert net.name == "test_model"
def main(): log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() model_xml = args.model model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for specified device and load extensions library if specified #plugin = IEPlugin(device=args.device, plugin_dirs=args.plugin_dir) ie = IECore() #print(dir(IEPlugin)) if args.cpu_extension and 'CPU' in args.device: #plugin.add_cpu_extension(args.cpu_extension) ie.add_extension(args.cpu_extension, args.device) else: ie.set_config({"PERF_COUNT": "YES"}, "GPU") # Read IR log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) if args.device == "CPU": supported_layers = ie.query_network(net, args.device) not_supported_layers = [ l for l in net.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( "Following layers are not supported by the plugin for specified device {}:\n {}" .format(plugin.device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) assert len( net.inputs.keys()) == 1, "Sample supports only single input topologies" assert len( net.outputs) == 1, "Sample supports only single output topologies" log.info("Preparing input blobs") input_blob = next(iter(net.inputs)) out_blob = next(iter(net.outputs)) net.batch_size = len(args.input) # Read and pre-process input images n, c, h, w = net.inputs[input_blob].shape print("size is", n) images = np.ndarray(shape=(n, c, h, w)) for i in range(n): image = cv2.imread(args.input[i]) if image.shape[:-1] != (h, w): log.warning("Image {} is resized from {} to {}".format( args.input[i], image.shape[:-1], (h, w))) image = cv2.resize(image, (w, h)) image = image.transpose( (2, 0, 1)) # Change data layout from HWC to CHW images[i] = image log.info("Batch size is {}".format(n)) # Loading model to the plugin log.info("Loading model to the plugin") exec_net = ie.load_network(network=net, num_requests=2, device_name=args.device) del net # Start sync inference log.info("Starting inference ({} iterations)".format(args.number_iter)) infer_time = [] for i in range(args.number_iter): t0 = time() res = exec_net.infer(inputs={input_blob: images}) infer_time.append((time() - t0) * 1000) # Processing output blob log.info("Processing output blob") res = res[out_blob] log.info("Top {} results: ".format(args.number_top)) args.labels = "models/squeezenet/FP32/squeezenet1.1.labels" if args.labels: with open(args.labels, 'r') as f: labels_map = [x.split(sep=' ', maxsplit=1)[-1].strip() for x in f] else: labels_map = None for i, probs in enumerate(res): probs = np.squeeze(probs) top_ind = np.argsort(probs)[-args.number_top:][::-1] print("Image {}\n".format(args.input[i])) for id in top_ind: det_label = labels_map[id] if labels_map else "#{}".format(id) print("{:<5}{:.7f} label {}".format(id, probs[id], det_label)) print("\n") total_inference = np.sum(np.asarray(infer_time)) log.info("Average running time of one iteration: {} ms".format( np.average(np.asarray(infer_time)))) log.info("total running time of inference: {} ms".format(total_inference)) log.info("Throughput: {} FPS".format( (1000 * args.number_iter * n) / total_inference)) print("\n") #printing performance counts exec_net.requests[0].infer({input_blob: images[0]}) if args.perf_counts: perf_counts = exec_net.requests[0].get_perf_counts() print("performance counts:\n") total = 0 for layer, stats in perf_counts.items(): total += stats['real_time'] print( "{:<40} {:<15} {:<10} {:<15} {:<8} {:<5} {:<5} {:<5} {:<10} {:<15}" .format(layer, stats['status'], 'layerType:', stats['layer_type'], 'realTime:', stats['real_time'], 'cpu:', stats['cpu_time'], 'execType:', stats['exec_type'])) print("{:<20} {:<7} {:<20}".format('TotalTime:', total, 'microseconds')) log.info("Execution successful") del exec_net del ie
class Model_FacialLandmarkDetection: ''' Class for the Face Detection Model. ''' def __init__(self, model_name, device='CPU', extensions=None): ''' TODO: Use this to set your instance variables. DONE ''' self.plugin = None self.network = None self.device = device self.extensions = extensions self.output_path = "../outputs/" self.model_weights = model_name + '.bin' self.model_structure = model_name + '.xml' try: self.model = IENetwork(self.model_structure, self.model_weights) except Exception as e: raise ValueError( "Could not Initialise the network. Have you enterred the correct model path?" ) self.input_name = next(iter(self.model.inputs)) self.input_shape = self.model.inputs[self.input_name].shape self.output_name = next(iter(self.model.outputs)) self.output_shape = self.model.outputs[self.output_name].shape def load_model(self): ''' TODO: You will need to complete this method. This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. DONE ''' self.plugin = IECore() if self.check_model() != True: logging.info( "Checking whether extensions are available to add to IECore..." ) if (self.extensions != None) and ("CPU" in self.device): self.plugin.add_extension(self.extensions, self.device) logging.info("Extension added.") else: logging.error("No extensions available. Exiting with error.") exit(1) t_0 = time.time() self.network = self.plugin.load_network(network=self.model, device_name=self.device) t_1 = time.time() with open(os.path.join(self.output_path, 'facial_landmarks.txt'), 'w') as f: f.write("model_load_time: ") f.write(str(t_1 - t_0) + '\n') def predict(self, image): ''' TODO: You will need to complete this method. This method is meant for running predictions on the input image. DONE ''' # preprocess and prepare input p_image = self.preprocess_input(image) input_dict = {self.input_name: p_image} # run inference t_0 = time.time() result = self.network.infer(input_dict) t_1 = time.time() with open(os.path.join(self.output_path, 'facial_landmarks.txt'), 'a') as f: #f.write("inference_time: ") f.write(str(t_1 - t_0) + '\n') # extract the useful tensor outputs = result[self.output_name] # get the eye-boxes eyes, eye_coords = self.preprocess_output(outputs, image) return eyes, eye_coords def check_model(self): # check for unsupported layers supported_layers = self.plugin.query_network(network=self.model, device_name=self.device) unsupported_layers = [ l for l in self.model.layers.keys() if l not in supported_layers ] if len(unsupported_layers) != 0: logging.info( "Model_FacialLandmarkDetection - Unsupported layers found: {}". format(unsupported_layers)) return False return True def preprocess_input(self, image): ''' Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. ''' input_model_width = self.input_shape[3] input_model_height = self.input_shape[2] p_image = cv2.resize(image, (input_model_width, input_model_height)) p_image = p_image.transpose((2, 0, 1)) p_image = p_image.reshape(1, *p_image.shape) return p_image def preprocess_output(self, outputs, image): ''' Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. ''' # get width and height of the original image original_width = image.shape[1] original_height = image.shape[0] # shape of outputs is (1, 10, 1, 1); reshape it outputs = outputs.reshape(-1) eye_boxes = [] eye_coords = [] pix = 15 # get the left eye x_l = int(outputs[0] * original_width) y_l = int(outputs[1] * original_height) # get the right eye x_r = int(outputs[2] * original_width) y_r = int(outputs[3] * original_height) # left eye box left_eye = image[(y_l - pix):(y_l + pix), (x_l - pix):(x_l + pix)] # to crop image[y_range, x_range] eye_boxes.append(left_eye) eye_coords.append([x_l, y_l]) #cv2.rectangle(image, (x_l - pix, y_l - pix), (x_l + pix, y_l + pix), (0, 55, 255), 1) # right eye box right_eye = image[(y_r - pix):(y_r + pix), (x_r - pix):(x_r + pix)] # to crop image[y_range, x_range] eye_boxes.append(right_eye) eye_coords.append([x_r, y_r]) #cv2.rectangle(image, (x_r - pix, y_r - pix), (x_r + pix, y_r + pix), (0, 55, 255), 1) return eye_boxes, eye_coords
def __init__(self, backend_name: str) -> None: self.backend_name = backend_name log.debug("Creating Inference Engine for %s" % backend_name) self.backend = IECore() assert backend_name in self.backend.available_devices, ( 'The requested device "' + backend_name + '" is not supported!')
def main(): args = build_argparser() logging.basicConfig(format="[ %(levelname)s ] %(message)s", level=logging.INFO, stream=sys.stdout) log = logging.getLogger() log.info("Creating Inference Engine") ie = IECore() if args.device == "CPU" and args.cpu_extension: ie.add_extension(args.cpu_extension, 'CPU') log.info("Loading model {}".format(args.model)) net = ie.read_network(args.model, args.model[:-4] + ".bin") if len(net.input_info) != 1: log.error("Demo supports only models with 1 input layer") sys.exit(1) input_blob = next(iter(net.input_info)) input_shape = net.input_info[input_blob].input_data.shape if len(net.outputs) != 1: log.error("Demo supports only models with 1 output layer") sys.exit(1) output_blob = next(iter(net.outputs)) log.info("Loading model to the plugin") exec_net = ie.load_network(network=net, device_name=args.device) log.info("Preparing input") labels = [] if args.labels: with open(args.labels, "r") as file: labels = [l.rstrip() for l in file.readlines()] batch_size, channels, one, length = input_shape if one != 1: raise RuntimeError( "Wrong third dimension size of model input shape - {} (expected 1)" .format(one)) audio = AudioSource(args.input, channels=channels, samplerate=args.sample_rate) hop = length - args.overlap if isinstance(args.overlap, int) else int( length * (1.0 - args.overlap)) if hop < 0: log.error( "Wrong value for '-ol/--overlap' argument - overlapping more than clip length" ) sys.exit(1) log.info("Starting inference") outputs = [] clips = 0 infer_time = 0 for idx, chunk in enumerate( audio.chunks(length, hop, num_chunks=batch_size)): chunk.shape = input_shape infer_start_time = time.perf_counter() output = exec_net.infer(inputs={input_blob: chunk}) infer_time += time.perf_counter() - infer_start_time clips += batch_size output = output[output_blob] for batch, data in enumerate(output): start_time = (idx * batch_size + batch) * hop / audio.samplerate end_time = ( (idx * batch_size + batch) * hop + length) / audio.samplerate outputs.append(data) label = np.argmax(data) if start_time < audio.duration(): log.info("[{:.2f}-{:.2f}] - {:6.2%} {:s}".format( start_time, end_time, data[label], labels[label] if labels else "Class {}".format(label))) logging.info("Average infer time - {:.1f} ms per clip".format( infer_time / clips * 1000))
class FaceDetection: """ Class for the Face Detection Model. """ def __init__(self, model_name, device='CPU', extensions=None): self.net = None self.plugin = None self.input_blob = None self.out_blob = None self.exec_net = None self.model_name = model_name self.extensions = extensions self.device = device def load_model(self): """ TODO: You will need to complete this method. This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. """ # Fetch XML model model_xml = self.model_name model_bin = os.path.splitext(model_xml)[0] + ".bin" self.plugin = IECore() self.net = IENetwork(model=model_xml, weights=model_bin) # Add CPU extension to self.plugin and check not supported layers if "CPU" in self.device: supported_layers = self.plugin.query_network(self.net, self.device) not_supported_layers = [ layer for layer in self.net.layers.keys() if layer not in supported_layers ] if len(not_supported_layers) != 0 and self.device == 'CPU': logging.error(f"Unsupported layers: {not_supported_layers}") print(f"Not supported layers: {not_supported_layers}") # Load model in network start_time = time.time() self.exec_net = self.plugin.load_network(network=self.net, device_name=self.device, num_requests=1) end_time = time.time() # Obtain blob info from network self.input_blob = next(iter(self.net.inputs)) self.out_blob = next(iter(self.net.outputs)) print(f"Face Detection Model Loading Time: {end_time - start_time}") logging.info( f"Face Detection Model Loading Time: {end_time - start_time}") def predict(self, image, visualize): """ TODO: You will need to complete this method. This method is meant for running predictions on the input image. """ # org_img = image.copy preprocessed_image = self.preprocess_input(image) # infer image outputs = self.exec_net.infer({self.input_blob: preprocessed_image}) coords = self.preprocess_output(outputs) if len(coords) == 0: logging.warning("No face found in video or image") return 0, 0 coords = coords[0] # take the first detected face height = image.shape[0] width = image.shape[1] coords = coords * np.array([width, height, width, height]) coords = coords.astype(np.int32) cropped_face = image[coords[1]:coords[3], coords[0]:coords[2]] cv2.rectangle(image, (coords[0], coords[1]), (coords[2], coords[3]), (255, 12, 12), 2) if visualize: # Save Image # cv2.imwrite('../output/face_detection1.jpg', cropped_face) # cv2.rectangle(image, (coords[0], coords[1]), (coords[2], coords[3]), (255, 12, 12), 2) cv2.imshow("Face detected", image) cv2.waitKey(0) else: logging.info("Visualization is off so image is not visible") return cropped_face, coords def preprocess_input(self, image): """ Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. """ preprocessed_image = cv2.resize(image, (672, 384)) preprocessed_image = preprocessed_image.transpose((2, 0, 1)) preprocessed_image = preprocessed_image.reshape(1, 3, 384, 672) return preprocessed_image def preprocess_output(self, outputs): """ Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. """ coords = [] outs = outputs[self.out_blob][0][0] logging.info(f"Total {len(outs)} face found") for out in outs: confidence = out[2] if confidence > 0.5: # args.threshold: x_min = out[3] y_min = out[4] x_max = out[5] y_max = out[6] coords.append([x_min, y_min, x_max, y_max]) logging.info(f"Face coordinate: {coords}") return coords
def load_models(self, pd_xml, pd_device, lm_xml, lm_device): print("Loading Inference Engine") self.ie = IECore() print("Device info:") versions = self.ie.get_versions(pd_device) print("{}{}".format(" " * 8, pd_device)) print("{}MKLDNNPlugin version ......... {}.{}".format( " " * 8, versions[pd_device].major, versions[pd_device].minor)) print("{}Build ........... {}".format( " " * 8, versions[pd_device].build_number)) # Pose detection model pd_name = os.path.splitext(pd_xml)[0] pd_bin = pd_name + '.bin' print( "Pose Detection model - Reading network files:\n\t{}\n\t{}".format( pd_xml, pd_bin)) self.pd_net = self.ie.read_network(model=pd_xml, weights=pd_bin) # Input blob: input - shape: [1, 3, 128, 128] # Output blob: classificators - shape: [1, 896, 1] : scores # Output blob: regressors - shape: [1, 896, 12] : bboxes self.pd_input_blob = next(iter(self.pd_net.input_info)) print( f"Input blob: {self.pd_input_blob} - shape: {self.pd_net.input_info[self.pd_input_blob].input_data.shape}" ) _, _, self.pd_h, self.pd_w = self.pd_net.input_info[ self.pd_input_blob].input_data.shape for o in self.pd_net.outputs.keys(): print(f"Output blob: {o} - shape: {self.pd_net.outputs[o].shape}") self.pd_scores = "classificators" self.pd_bboxes = "regressors" print("Loading pose detection model into the plugin") self.pd_exec_net = self.ie.load_network(network=self.pd_net, num_requests=1, device_name=pd_device) self.pd_infer_time_cumul = 0 self.pd_infer_nb = 0 self.infer_nb = 0 self.infer_time_cumul = 0 # Landmarks model if lm_device != pd_device: print("Device info:") versions = self.ie.get_versions(pd_device) print("{}{}".format(" " * 8, pd_device)) print("{}MKLDNNPlugin version ......... {}.{}".format( " " * 8, versions[pd_device].major, versions[pd_device].minor)) print("{}Build ........... {}".format( " " * 8, versions[pd_device].build_number)) lm_name = os.path.splitext(lm_xml)[0] lm_bin = lm_name + '.bin' print("Landmark model - Reading network files:\n\t{}\n\t{}".format( lm_xml, lm_bin)) self.lm_net = self.ie.read_network(model=lm_xml, weights=lm_bin) # Input blob: input_1 - shape: [1, 3, 256, 256] # Output blob: ld_3d - shape: [1, 195] for full body or [1, 155] for upper body # Output blob: output_poseflag - shape: [1, 1] # Output blob: output_segmentation - shape: [1, 1, 128, 128] self.lm_input_blob = next(iter(self.lm_net.input_info)) print( f"Input blob: {self.lm_input_blob} - shape: {self.lm_net.input_info[self.lm_input_blob].input_data.shape}" ) _, _, self.lm_h, self.lm_w = self.lm_net.input_info[ self.lm_input_blob].input_data.shape for o in self.lm_net.outputs.keys(): print(f"Output blob: {o} - shape: {self.lm_net.outputs[o].shape}") self.lm_score = "output_poseflag" self.lm_segmentation = "output_segmentation" self.lm_landmarks = "ld_3d" print("Loading landmark model to the plugin") self.lm_exec_net = self.ie.load_network(network=self.lm_net, num_requests=1, device_name=lm_device) self.lm_infer_time_cumul = 0 self.lm_infer_nb = 0
def main(): log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() model_xml = args.model model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for specified device and load extensions library if specified log.info("Creating Inference Engine") ie = IECore() if args.cpu_extension and 'CPU' in args.device: ie.add_extension(args.cpu_extension, "CPU") # Read IR log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) if "CPU" in args.device: supported_layers = ie.query_network(net, "CPU") not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] if len(not_supported_layers) != 0: log.error("Following layers are not supported by the plugin for specified device {}:\n {}". format(args.device, ', '.join(not_supported_layers))) log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) assert len(net.inputs.keys()) == 1, "Sample supports only single input topologies" assert len(net.outputs) == 1, "Sample supports only single output topologies" log.info("Preparing input blobs") input_blob = next(iter(net.inputs)) out_blob = next(iter(net.outputs)) net.batch_size = len(args.input) # Read and pre-process input images n, c, h, w = net.inputs[input_blob].shape images = np.ndarray(shape=(n, c, h, w)) for i in range(n): image = cv2.imread(args.input[i]) if image.shape[:-1] != (h, w): log.warning("Image {} is resized from {} to {}".format(args.input[i], image.shape[:-1], (h, w))) image = cv2.resize(image, (w, h)) image = image.transpose((2, 0, 1)) # Change data layout from HWC to CHW images[i] = image log.info("Batch size is {}".format(n)) # Loading model to the plugin log.info("Loading model to the plugin") exec_net = ie.load_network(network=net, device_name=args.device) # Start sync inference log.info("Starting inference in synchronous mode") res = exec_net.infer(inputs={input_blob: images}) # Processing output blob log.info("Processing output blob") res = res[out_blob] # res = out_blob for batch, data in enumerate(res): data = np.swapaxes(data, 0, 2) data = np.swapaxes(data, 0, 1) data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB) data[data < 0] = 0 data[data > 255] = 255 # data = data[::] - (args.mean_val_r, args.mean_val_g, args.mean_val_b) # out_img = os.path.join(os.path.dirname(__file__), "out_{}.bmp".format(batch)) res_name = os.path.basename(args.input[i]).split(".")[0]+ "_output.jpg" output_dir = "/home/zhu/PycharmProjects/denoise_cnn/openvion/model_IR/20200924" out_img = os.path.join(output_dir, res_name) cv2.imwrite(out_img, data) log.info("Result image was saved to {}".format(out_img))
class Network: ''' Load and store information for working with the Inference Engine, and any loaded models. ''' def __init__(self): self.plugin = None self.network = None self.input_blob = None self.output_blob = None self.exec_net = None self.infer_request = None def load_model(self, model, device="CPU", cpu_extension=None): ''' Load the model given IR files. Defaults to CPU as device for use in the workspace. Synchronous requests made within. ''' model_xml = model model_bin = os.path.splitext(model_xml)[0] + ".bin" # Initialize the plugin self.plugin = IECore() # Add a CPU extension, if applicable if cpu_extension and "CPU" in device: self.plugin.add_extension(cpu_extension, device) # Read the IR as a IENetwork self.network = IENetwork(model=model_xml, weights=model_bin) # Load the IENetwork into the plugin self.exec_net = self.plugin.load_network(self.network, device) # Get the input layer self.input_blob = next(iter(self.network.inputs)) self.output_blob = next(iter(self.network.outputs)) return def get_input_shape(self): ''' Gets the input shape of the network ''' return self.network.inputs[self.input_blob].shape def async_inference(self, image): ''' Makes an asynchronous inference request, given an input image. ''' self.exec_net.start_async(request_id=0, inputs={self.input_blob: image}) return def wait(self): ''' Checks the status of the inference request. ''' status = self.exec_net.requests[0].wait(-1) return status def extract_output(self): ''' Returns a list of the results for the output layer of the network. ''' return self.exec_net.requests[0].outputs[self.output_blob]
class AgeGenderRecognitionModel: def __init__(self, model_name, device='CPU', extensions=None, num_requests=1): self.model_name = model_name self.device = device self.extensions = extensions self.model_structure = self.model_name self.model_weights = self.model_name.split('.')[0] + '.bin' self.num_requests = num_requests self.plugin = None self.network = None self.exec_net = None self.input_name = None self.input_shape = None self.output_names = None self.is_sync = None def load_model(self): self.plugin = IECore() self.network = self.plugin.read_network(model=self.model_structure, weights=self.model_weights) supported_layers = self.plugin.query_network(network=self.network, device_name=self.device) unsupported_layers = [ l for l in self.network.layers.keys() if l not in supported_layers ] if len(unsupported_layers) != 0 and self.device == 'CPU': print("unsupported layers found:{}".format(unsupported_layers)) if not self.extensions == None: print("Adding cpu_extension") self.plugin.add_extension(self.extensions, self.device) supported_layers = self.plugin.query_network( network=self.network, device_name=self.device) unsupported_layers = [ l for l in self.network.layers.keys() if l not in supported_layers ] if len(unsupported_layers) != 0: print( "After adding the extension still unsupported layers found" ) exit(1) print("After adding the extension the issue is resolved") else: print("Give the path of cpu extension") exit(1) self.exec_net = self.plugin.load_network( network=self.network, device_name=self.device, num_requests=self.num_requests) if self.num_requests == 1: self.is_sync = True self.input_name = next(iter(self.network.inputs)) self.input_shape = self.network.inputs[self.input_name].shape self.output_names = [i for i in self.network.outputs] def predict(self, image, cur_req_id=None, next_req_id=None): img_processed = self.preprocess_input(image.copy()) if self.is_sync: outputs = self.exec_net.infer({self.input_name: img_processed}) age, gender = self.preprocess_output(outputs) return age, gender, True self.exec_net.start_async(request_id=next_req_id, inputs={self.input_name: img_processed}) if self.exec_net.requests[cur_req_id].wait() == 0: outputs = self.exec_net.requests[cur_req_id].outputs age, gender = self.preprocess_output(outputs) return age, gender, True return None, None, False def check_model(self): '' def preprocess_input(self, image): image_resized = cv2.resize(image, (self.input_shape[3], self.input_shape[2])) img_processed = np.transpose(np.expand_dims(image_resized, axis=0), (0, 3, 1, 2)) return img_processed def preprocess_output(self, outputs): age = outputs[self.output_names[0]][0][0][0][0] * 100 gender = np.argmax(outputs[self.output_names[1]]) #0:female 1:male return age, gender
class Network: """ Load and configure inference plugins for the specified target devices and performs synchronous and asynchronous modes for the specified infer requests. """ def __init__(self): ### TODO: Initialize any class variables desired ### self.plugin = None self.network = None self.input_blob = None self.output_blob = None self.exec_network = None self.infer_request = None def load_model(self, model, num_req,device="CPU" ,cpu_extension=None): ### TODO: Load the model ### model_xml = model model_bin = os.path.splitext(model_xml)[0] + ".bin" ### TODO: Check for supported layers ### ### TODO: Add any necessary extensions ### ### TODO: Return the loaded inference plugin ### self.plugin = IECore() # Add a CPU extension, if applicable if cpu_extension and "CPU" in device: self.plugin.add_extension(cpu_extension, device) # Read the IR as a IENetwork self.network = IENetwork(model=model_xml, weights=model_bin) # Load the IENetwork into the plugin if num_req == 0: # Loads network read from IR to the plugin self.exec_network = self.plugin.load_network(self.network, device) else: self.exec_network = self.plugin.load_network(self.network, device,num_req=num_req) self.input_blob = next(iter(self.network.inputs)) self.output_blob = next(iter(self.network.outputs)) return self.plugin ### Note: You may need to update the function parameters. ### def get_input_shape(self): ### TODO: Return the shape of the input layer ### return self.network.inputs[self.input_blob].shape def exec_net(self,image): ### TODO: Start an asynchronous request ### self.exec_network.start_async(request_id=0,inputs={self.input_blob: image}) ### TODO: Return any necessary information ### ### Note: You may need to update the function parameters. ### return self.plugin def wait(self,request_id): ### TODO: Wait for the request to be complete. ### status = self.exec_network.requests[request_id].wait(-1) ### TODO: Return any necessary information ### ### Note: You may need to update the function parameters. ### return status def get_output(self,request_id,output=None): ### TODO: Extract and return the output results ### Note: You may need to update the function parameters. ### if output: res = self.infer_request_handle.outputs[output] else: res = self.exec_network.requests[request_id].outputs[self.output_blob] return res
class GazeEstimationModel: ''' Class for the Face Detection Model. ''' def __init__(self, model_name, device='CPU', extensions=None): ''' TODO: Use this to set your instance variables. ''' self.model_name = model_name self.device = device self.extensions = extensions self.model_structure = self.model_name self.model_weights = self.model_name.split(".")[0]+'.bin' self.plugin = None self.network = None self.exec_net = None self.input_name = None self.input_shape = None self.output_names = None self.output_shape = None def load_model(self): ''' TODO: You will need to complete this method. This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' self.plugin = IECore() self.network = self.plugin.read_network(model=self.model_structure, weights=self.model_weights) supported_layers = self.plugin.query_network(network=self.network, device_name=self.device) unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers] if len(unsupported_layers)!=0 and self.device=='CPU': print("unsupported layers found:{}".format(unsupported_layers)) if not self.extensions==None: print("Adding cpu_extension") self.plugin.add_extension(self.extensions, self.device) supported_layers = self.plugin.query_network(network = self.network, device_name=self.device) unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers] if len(unsupported_layers)!=0: print("After adding the extension still unsupported layers found") exit(1) print("After adding the extension the issue is resolved") else: print("Give the path of cpu extension") exit(1) self.exec_net = self.plugin.load_network(network=self.network, device_name=self.device,num_requests=1) self.input_name = [i for i in self.network.inputs.keys()] self.input_shape = self.network.inputs[self.input_name[1]].shape self.output_names = [i for i in self.network.outputs.keys()] def predict(self, left_eye_image, right_eye_image, hpa): ''' TODO: You will need to complete this method. This method is meant for running predictions on the input image. ''' le_img_processed, re_img_processed = self.preprocess_input(left_eye_image.copy(), right_eye_image.copy()) outputs = self.exec_net.infer({'head_pose_angles':hpa, 'left_eye_image':le_img_processed, 'right_eye_image':re_img_processed}) new_mouse_coord, gaze_vector = self.preprocess_output(outputs,hpa) return new_mouse_coord, gaze_vector def check_model(self): '' def preprocess_input(self, left_eye, right_eye): ''' Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. ''' le_image_resized = cv2.resize(left_eye, (self.input_shape[3], self.input_shape[2])) re_image_resized = cv2.resize(right_eye, (self.input_shape[3], self.input_shape[2])) le_img_processed = np.transpose(np.expand_dims(le_image_resized,axis=0), (0,3,1,2)) re_img_processed = np.transpose(np.expand_dims(re_image_resized,axis=0), (0,3,1,2)) return le_img_processed, re_img_processed def preprocess_output(self, outputs,hpa): ''' Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. ''' gaze_vector = outputs[self.output_names[0]].tolist()[0] #gaze_vector = gaze_vector / cv2.norm(gaze_vector) rollValue = hpa[2] #angle_r_fc output from HeadPoseEstimation model cosValue = math.cos(rollValue * math.pi / 180.0) sinValue = math.sin(rollValue * math.pi / 180.0) newx = gaze_vector[0] * cosValue + gaze_vector[1] * sinValue newy = -gaze_vector[0] * sinValue+ gaze_vector[1] * cosValue return (newx,newy), gaze_vector
class OpenVINO_Core: def __init__(self): self.ie = IECore() self.name = "" self.asyncInference = True self.plugin = None self.ieNet = None self.result_processor = None self.exec_net = None devices = [] for device in self.ie.available_devices: if 'MYRIAD' in device: if not 'MYRIAD' in devices: devices.append('MYRIAD') else: if not device in devices: devices.append(device) self.devices = devices self.outputFormat = Output_Format.Unknown self.inputFormat = Input_Format.Unknown # self.outputName = None self._debug = True self.ver_major = 0 self.ver_minor = 0 self.ver_build = 0 self.current_hw = None self.current_precision = None self.current_model = None self.request_slot_curr = 1 self.request_slot_next = 0 def reset_engine(self): self.name = "" self.plugin = None self.ieNet = None self.result_processor = None self.exec_net = None self.outputFormat = Output_Format.Unknown self.inputFormat = Input_Format.Unknown # self.outputName = None self.ver_major = 0 self.ver_minor = 0 self.ver_build = 0 self.classLabels = {} self.current_hw = None self.current_precision = None self.current_model = None def dump(self, obj): print('=================================================') for attr in dir(obj): print('obj.%s = %r' % (attr, getattr(obj,attr))) print('=================================================') def get_signature(self): if len(self.ie.available_devices) > 0: device = self.ie.available_devices[0] version = self.ie.get_versions(device) if os.getenv('OPENVINO_OBJECT_DETECTION_PYTHON'): signature = 'OpenVINO {}.{}.{} in Container'.format(version[device].major,version[device].minor, version[device].build_number) else: signature = 'OpenVINO {}.{}.{}'.format(version[device].major,version[device].minor, version[device].build_number) else: signature = 'OpenVINO No Hardware Found' return signature def load_model(self, xml_file, bin_file, device = "MYRIAD", cpu_extension = None, precision = 'FP16'): # N : # of images in batch # C : Channel # H : Height # W : Width # Input => HWC if self._debug: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) try: self.reset_engine() p_model = Path(xml_file).resolve() self.name = str(Path(p_model.name).stem) logging.info('==================================================================') logging.info('Loading Model') logging.info(' Name : {}'.format(self.name)) logging.info(' Target : {}'.format(device)) logging.info(' Model : {}'.format(xml_file)) logging.info(' Precision : {}'.format(precision)) version_data = self.ie.get_versions(device) self.ver_major = int(version_data[device].major) self.ver_minor = int(version_data[device].minor) self.ver_build = int(version_data[device].build_number) # self.plugin = IEPlugin(device=device) # if 'MYRIAD' in device: # #https://docs.openvinotoolkit.org/latest/_docs_IE_DG_supported_plugins_MYRIAD.html # self.plugin.set_config({"VPU_FORCE_RESET": "NO"}) if self.ie: del self.ie self.ie = IECore() if self.ver_major >= 2 and self.ver_minor >= 1 and self.ver_build >= 42025: self.ieNet = self.ie.read_network(model = xml_file, weights = bin_file) else: self.ieNet = IENetwork(model = xml_file, weights = bin_file) # process input # image_tensor : TensorFlow # data : Caffe if len(self.ieNet.inputs) > 2: logging.warning('!! Too many inputs. Not supported') return Model_Flag.LoadError # don't touch layers. Somehow touching layer will cause load failure with Myriad # logging.info(' -Layers') # logging.info(' Type : {}'.format(self.ieNet.layers[key].type)) # self.dump(self.ieNet.layers[key]) logging.info('==================================================================') logging.info('Output Blobs') for key, blob in self.ieNet.outputs.items(): logging.info('Output Key : {}'.format(key)) logging.info(' Layout : {}'.format(blob.layout)) logging.info(' Shape : {}'.format(blob.shape)) logging.info(' Precision : {}'.format(blob.precision)) # logging.info(' -Layers') # logging.info(' Type : {}'.format(self.ieNet.layers[key].type)) # self.dump(self.ieNet.layers[key]) # blob.precision = precision logging.info('==================================================================') logging.info('Input Blobs') for key, blob in self.ieNet.inputs.items(): logging.info('Input Key : {}'.format(key)) logging.info(' Layout : {}'.format(blob.layout)) logging.info(' Shape : {}'.format(blob.shape)) logging.info(' Precision : {}'.format(blob.precision)) # blob.precision = precision logging.info('>> Loading model to {}'.format(device)) # self.exec_net = self.ie.load_network(network = self.ieNet, device_name = device, num_requests = 2) self.exec_net = self.ie.load_network(network = self.ieNet, device_name = device, num_requests = 2) logging.info('<< Model loaded to {}'.format(device)) # # touch layers only after we load # self.output_blob_key = next(iter(self.ieNet.outputs)) for key, blob in self.ieNet.outputs.items(): layer = self.ieNet.layers[key] if layer.type == 'DetectionOutput': outputFormat = Output_Format.DetectionOutput elif layer.type == 'RegionYolo': outputFormat = Output_Format.RegionYolo elif layer.type == 'Convolution': if layer.name == 'Mconv7_stage2_L1' or layer.name == 'Mconv7_stage2_L2': outputFormat = Output_Format.HumanPose else: return Model_Flag.Unsupported if outputFormat == Output_Format.DetectionOutput: if len(self.ieNet.inputs) == 1 and len(self.ieNet.outputs) == 1: # 1 input, 1 output input_key = next(iter(self.ieNet.inputs)) output_key = next(iter(self.ieNet.outputs)) layer = self.ieNet.layers[output_key] if layer.type == 'DetectionOutput': outputFormat = Output_Format.DetectionOutput else: return Model_Flag.Unsupported if input_key == 'image_tensor': self.inputFormat = Input_Format.Tensorflow elif input_key == 'image': self.inputFormat = Input_Format.IntelIR elif input_key == 'data': self.inputFormat = Input_Format.Caffe else: self.inputFormat = Input_Format.Other params = self.ieNet.layers[output_key].params input_blob = self.ieNet.inputs[input_key] self.result_processor = Object_Detection_Processor( model_name = self.name, input_format = self.inputFormat, input_key = input_key, input_shape = input_blob.shape, input_layout = input_blob.layout, output_format = outputFormat, output_key = output_key, output_params = params) elif len(self.ieNet.inputs) == 2 and len(self.ieNet.outputs) == 1: # 2 inputs and 1 output. Faster RCNN output_key = next(iter(self.ieNet.outputs)) layer = self.ieNet.layers[output_key] if layer.type != 'DetectionOutput': return Model_Flag.Unsupported info_key = "" data_key = "" for key, blob in self.ieNet.inputs.items(): if key == 'image_info': info_key = key elif key == 'image_tensor': data_key = key if len(info_key) > 0 and len(data_key) > 0: self.inputFormat = Input_Format.Faster_RCNN input_blob = self.ieNet.inputs[data_key] params = self.ieNet.layers[output_key].params self.result_processor = Object_Detection_RCNN_Processor( model_name = self.name, input_format = self.inputFormat, info_key = info_key, data_key = data_key, data_shape = input_blob.shape, data_layout = input_blob.layout, output_format = Output_Format.DetectionOutput, output_key = output_key, output_params = params) else: return Model_Flag.Unsupported elif outputFormat == Output_Format.RegionYolo: input_key = next(iter(self.ieNet.inputs)) input_blob = self.ieNet.inputs[input_key] self.inputFormat = Input_Format.Yolo self.result_processor = Object_Detection_Yolo_Processor( model_name = self.name, input_format = self.inputFormat, input_key = input_key, input_shape = input_blob.shape, input_layout = input_blob.layout, output_format = Output_Format.RegionYolo) for key, blob in self.ieNet.outputs.items(): self.result_processor.reshape_data[key] = self.ieNet.layers[self.ieNet.layers[key].parents[0]].shape self.result_processor.set_class_label(self.ieNet.layers[key].params) # for key, blob in self.result_processor.reshape_data.items(): # print('{} {}'.format(key, blob)) elif outputFormat == Output_Format.HumanPose: input_key = next(iter(self.ieNet.inputs)) input_blob = self.ieNet.inputs[input_key] self.inputFormat = Input_Format.HumanPose self.result_processor = Human_Pose_Processor( model_name = self.name, input_format = Input_Format.HumanPose, input_shape = input_blob.shape, input_layout = input_blob.layout ) return Model_Flag.Loaded except Exception as ex: exc_type, exc_obj, exc_tb = sys.exc_info() traceback.print_exception(exc_type, exc_obj, exc_tb) logging.error('!! {0}:{1}() : Exception {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, ex)) return Model_Flag.LoadError def run_inference(self, frame, confidence): # if self._debug: # logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) return_frame = frame if self.result_processor == None: return return_frame if self.inputFormat == Input_Format.Faster_RCNN: inference_data = self.result_processor.process_for_inference(frame = frame) if inference_data: if self.asyncInference: self.result_processor.prev_frame = frame self.exec_net.start_async(request_id=self.request_slot_next, inputs={inference_data.data_key : inference_data.image_data, inference_data.info_key : inference_data.image_info}) if self.exec_net.requests[self.request_slot_curr].wait(-1) == 0: return_frame = self.result_processor.process_result(self.exec_net.requests[self.request_slot_curr].outputs, self.result_processor.prev_frame, confidence) else: self.request_slot_curr = 0 self.exec_net.infer(inputs={inference_data.data_key : inference_data.image_data, inference_data.info_key : inference_data.image_info}) return_frame = self.result_processor.process_result(self.exec_net.requests[self.request_slot_curr].outputs, frame, confidence) elif self.inputFormat == Input_Format.Yolo: frame_data, input_key = self.result_processor.process_for_inference(frame = frame) if frame_data.size > 0: if self.asyncInference: self.result_processor.prev_frame = frame self.result_processor.prev_frame_data = frame_data self.exec_net.start_async(request_id=self.request_slot_next, inputs={input_key : frame_data}) if self.exec_net.requests[self.request_slot_curr].wait(-1) == 0: return_frame = self.result_processor.process_result(layers = self.ieNet.layers, results = self.exec_net.requests[self.request_slot_curr].outputs, frame_data = self.result_processor.prev_frame_data, frame = self.result_processor.prev_frame, confidence = confidence) else: self.request_slot_curr = 0 self.exec_net.infer(inputs={input_key : frame_data}) return_frame = self.result_processor.process_result(layers = self.ieNet.layers, results = self.exec_net.requests[self.request_slot_curr].outputs, frame_data = frame_data, frame = frame, confidence = confidence) elif self.inputFormat == Input_Format.HumanPose: frame_data, input_key = self.result_processor.process_for_inference(frame = frame) if frame_data.size > 0: if self.asyncInference: self.result_processor.prev_frame = frame self.exec_net.start_async(request_id=self.request_slot_next, inputs={input_key : frame_data}) if self.exec_net.requests[self.request_slot_curr].wait(-1) == 0: return_frame = self.result_processor.process_result(self.exec_net.requests[self.request_slot_curr].outputs, self.result_processor.prev_frame, confidence) assert return_frame.size > 0, "Frame Empty" else: self.request_slot_curr = 0 self.exec_net.infer(inputs={input_key : frame_data}) return_frame = self.result_processor.process_result(self.exec_net.requests[self.request_slot_curr].outputs, frame, confidence) elif self.inputFormat == Input_Format.Unknown: pass else: # elif self.inputFormat == Input_Format.Tensorflow or self.inputFormat == Input_Format.Caffe or self.inputFormat == Input_Format.IntelIR: # SSD/MobileNet Tensorflow models frame_data, input_key = self.result_processor.process_for_inference(frame = frame) if frame_data.size > 0: if self.asyncInference: self.result_processor.prev_frame = frame self.exec_net.start_async(request_id=self.request_slot_next, inputs={input_key : frame_data}) if self.exec_net.requests[self.request_slot_curr].wait(-1) == 0: return_frame = self.result_processor.process_result(self.exec_net.requests[self.request_slot_curr].outputs, self.result_processor.prev_frame, confidence) assert return_frame.size > 0, "Frame Empty" else: self.request_slot_curr = 0 self.exec_net.infer(inputs={input_key : frame_data}) return_frame = self.result_processor.process_result(self.exec_net.requests[self.request_slot_curr].outputs, frame, confidence) if self.asyncInference: self.request_slot_next, self.request_slot_curr = self.request_slot_curr, self.request_slot_next return return_frame
class BlazeposeOpenvino: def __init__(self, input_src=None, pd_xml=POSE_DETECTION_MODEL, pd_device="CPU", pd_score_thresh=0.5, pd_nms_thresh=0.3, lm_xml=FULL_BODY_LANDMARK_MODEL, lm_device="CPU", lm_score_threshold=0.7, full_body=True, use_gesture=False, smoothing=True, filter_window_size=5, filter_velocity_scale=10, show_3d=False, crop=False, multi_detection=False, force_detection=False, output=None): self.pd_score_thresh = pd_score_thresh self.pd_nms_thresh = pd_nms_thresh self.lm_score_threshold = lm_score_threshold self.full_body = full_body self.use_gesture = use_gesture self.smoothing = smoothing self.show_3d = show_3d self.crop = crop self.multi_detection = multi_detection self.force_detection = force_detection if self.multi_detection: print( "Warning: with multi-detection, smoothing filter is disabled and pose detection is forced on every frame." ) self.smoothing = False self.force_detection = True if input_src.endswith('.jpg') or input_src.endswith('.png'): self.input_type = "image" self.img = cv2.imread(input_src) self.video_fps = 25 video_height, video_width = self.img.shape[:2] else: self.input_type = "video" if input_src.isdigit(): input_type = "webcam" input_src = int(input_src) self.cap = cv2.VideoCapture(input_src) self.video_fps = int(self.cap.get(cv2.CAP_PROP_FPS)) video_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) video_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) print("Video FPS:", self.video_fps) # The full body landmark model predict 39 landmarks. # We are interested in the first 35 landmarks # from 1 to 33 correspond to the well documented body parts, # 34th (mid hips) and 35th (a point above the head) are used to predict ROI of next frame # Same for upper body model but with 8 less landmarks self.nb_lms = 35 if self.full_body else 27 if self.smoothing: self.filter = mpu.LandmarksSmoothingFilter(filter_window_size, filter_velocity_scale, (self.nb_lms - 2, 3)) # Create SSD anchors # https://github.com/google/mediapipe/blob/master/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt anchor_options = mpu.SSDAnchorOptions( num_layers=4, min_scale=0.1484375, max_scale=0.75, input_size_height=128, input_size_width=128, anchor_offset_x=0.5, anchor_offset_y=0.5, strides=[8, 16, 16, 16], aspect_ratios=[1.0], reduce_boxes_in_lowest_layer=False, interpolated_scale_aspect_ratio=1.0, fixed_anchor_size=True) self.anchors = mpu.generate_anchors(anchor_options) self.nb_anchors = self.anchors.shape[0] print(f"{self.nb_anchors} anchors have been created") # Load Openvino models self.load_models(pd_xml, pd_device, lm_xml, lm_device) # Rendering flags self.show_pd_box = False self.show_pd_kps = False self.show_rot_rect = False self.show_landmarks = True self.show_scores = False self.show_gesture = self.use_gesture self.show_fps = True self.show_segmentation = False if self.show_3d: self.vis3d = o3d.visualization.Visualizer() self.vis3d.create_window() opt = self.vis3d.get_render_option() opt.background_color = np.asarray([0, 0, 0]) z = min(video_height, video_width) / 3 self.grid_floor = create_grid([0, video_height, -z], [video_width, video_height, -z], [video_width, video_height, z], [0, video_height, z], 5, 2, color=(1, 1, 1)) self.grid_wall = create_grid([0, 0, z], [video_width, 0, z], [video_width, video_height, z], [0, video_height, z], 5, 2, color=(1, 1, 1)) self.vis3d.add_geometry(self.grid_floor) self.vis3d.add_geometry(self.grid_wall) view_control = self.vis3d.get_view_control() view_control.set_up(np.array([0, -1, 0])) view_control.set_front(np.array([0, 0, -1])) if output is None: self.output = None else: fourcc = cv2.VideoWriter_fourcc(*"MJPG") self.output = cv2.VideoWriter(output, fourcc, self.video_fps, (video_width, video_height)) def load_models(self, pd_xml, pd_device, lm_xml, lm_device): print("Loading Inference Engine") self.ie = IECore() print("Device info:") versions = self.ie.get_versions(pd_device) print("{}{}".format(" " * 8, pd_device)) print("{}MKLDNNPlugin version ......... {}.{}".format( " " * 8, versions[pd_device].major, versions[pd_device].minor)) print("{}Build ........... {}".format( " " * 8, versions[pd_device].build_number)) # Pose detection model pd_name = os.path.splitext(pd_xml)[0] pd_bin = pd_name + '.bin' print( "Pose Detection model - Reading network files:\n\t{}\n\t{}".format( pd_xml, pd_bin)) self.pd_net = self.ie.read_network(model=pd_xml, weights=pd_bin) # Input blob: input - shape: [1, 3, 128, 128] # Output blob: classificators - shape: [1, 896, 1] : scores # Output blob: regressors - shape: [1, 896, 12] : bboxes self.pd_input_blob = next(iter(self.pd_net.input_info)) print( f"Input blob: {self.pd_input_blob} - shape: {self.pd_net.input_info[self.pd_input_blob].input_data.shape}" ) _, _, self.pd_h, self.pd_w = self.pd_net.input_info[ self.pd_input_blob].input_data.shape for o in self.pd_net.outputs.keys(): print(f"Output blob: {o} - shape: {self.pd_net.outputs[o].shape}") self.pd_scores = "classificators" self.pd_bboxes = "regressors" print("Loading pose detection model into the plugin") self.pd_exec_net = self.ie.load_network(network=self.pd_net, num_requests=1, device_name=pd_device) self.pd_infer_time_cumul = 0 self.pd_infer_nb = 0 self.infer_nb = 0 self.infer_time_cumul = 0 # Landmarks model if lm_device != pd_device: print("Device info:") versions = self.ie.get_versions(pd_device) print("{}{}".format(" " * 8, pd_device)) print("{}MKLDNNPlugin version ......... {}.{}".format( " " * 8, versions[pd_device].major, versions[pd_device].minor)) print("{}Build ........... {}".format( " " * 8, versions[pd_device].build_number)) lm_name = os.path.splitext(lm_xml)[0] lm_bin = lm_name + '.bin' print("Landmark model - Reading network files:\n\t{}\n\t{}".format( lm_xml, lm_bin)) self.lm_net = self.ie.read_network(model=lm_xml, weights=lm_bin) # Input blob: input_1 - shape: [1, 3, 256, 256] # Output blob: ld_3d - shape: [1, 195] for full body or [1, 155] for upper body # Output blob: output_poseflag - shape: [1, 1] # Output blob: output_segmentation - shape: [1, 1, 128, 128] self.lm_input_blob = next(iter(self.lm_net.input_info)) print( f"Input blob: {self.lm_input_blob} - shape: {self.lm_net.input_info[self.lm_input_blob].input_data.shape}" ) _, _, self.lm_h, self.lm_w = self.lm_net.input_info[ self.lm_input_blob].input_data.shape for o in self.lm_net.outputs.keys(): print(f"Output blob: {o} - shape: {self.lm_net.outputs[o].shape}") self.lm_score = "output_poseflag" self.lm_segmentation = "output_segmentation" self.lm_landmarks = "ld_3d" print("Loading landmark model to the plugin") self.lm_exec_net = self.ie.load_network(network=self.lm_net, num_requests=1, device_name=lm_device) self.lm_infer_time_cumul = 0 self.lm_infer_nb = 0 def pd_postprocess(self, inference): scores = np.squeeze(inference[self.pd_scores]) # 896 bboxes = inference[self.pd_bboxes][0] # 896x12 # Decode bboxes self.regions = mpu.decode_bboxes(self.pd_score_thresh, scores, bboxes, self.anchors, best_only=not self.multi_detection) # Non maximum suppression (not needed if best_only is True) if self.multi_detection: self.regions = mpu.non_max_suppression(self.regions, self.pd_nms_thresh) mpu.detections_to_rect(self.regions, kp_pair=[0, 1] if self.full_body else [2, 3]) mpu.rect_transformation(self.regions, self.frame_size, self.frame_size) def pd_render(self, frame): for r in self.regions: if self.show_pd_box: box = (np.array(r.pd_box) * self.frame_size).astype(int) cv2.rectangle(frame, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (0, 255, 0), 2) if self.show_pd_kps: # Key point 0 - mid hip center # Key point 1 - point that encodes size & rotation (for full body) # Key point 2 - mid shoulder center # Key point 3 - point that encodes size & rotation (for upper body) if self.full_body: # Only kp 0 and 1 used list_kps = [0, 1] else: # Only kp 2 and 3 used for upper body list_kps = [2, 3] for kp in list_kps: x = int(r.pd_kps[kp][0] * self.frame_size) y = int(r.pd_kps[kp][1] * self.frame_size) cv2.circle(frame, (x, y), 3, (0, 0, 255), -1) cv2.putText(frame, str(kp), (x, y + 12), cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 255, 0), 2) if self.show_scores and r.pd_score is not None: cv2.putText(frame, f"Pose score: {r.pd_score:.2f}", (50, self.frame_size // 2), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2) def lm_postprocess(self, region, inference): region.lm_score = np.squeeze(inference[self.lm_score]) if region.lm_score > self.lm_score_threshold: self.nb_active_regions += 1 lm_raw = inference[self.lm_landmarks].reshape(-1, 5) # Each keypoint have 5 information: # - X,Y coordinates are local to the region of # interest and range from [0.0, 255.0]. # - Z coordinate is measured in "image pixels" like # the X and Y coordinates and represents the # distance relative to the plane of the subject's # hips, which is the origin of the Z axis. Negative # values are between the hips and the camera; # positive values are behind the hips. Z coordinate # scale is similar with X, Y scales but has different # nature as obtained not via human annotation, by # fitting synthetic data (GHUM model) to the 2D # annotation. # - Visibility, after user-applied sigmoid denotes the # probability that a keypoint is located within the # frame and not occluded by another bigger body # part or another object. # - Presence, after user-applied sigmoid denotes the # probability that a keypoint is located within the # frame. # Normalize x,y,z. Here self.lm_w = self.lm_h and scaling in z = scaling in x = 1/self.lm_w lm_raw[:, :3] /= self.lm_w # Apply sigmoid on visibility and presence (if used later) # lm_raw[:,3:5] = 1 / (1 + np.exp(-lm_raw[:,3:5])) # region.landmarks contains the landmarks normalized 3D coordinates in the relative oriented body bounding box region.landmarks = lm_raw[:, :3] # Calculate the landmark coordinate in square padded image (region.landmarks_padded) src = np.array([(0, 0), (1, 0), (1, 1)], dtype=np.float32) dst = np.array( [(x, y) for x, y in region.rect_points[1:]], dtype=np.float32 ) # region.rect_points[0] is left bottom point and points going clockwise! mat = cv2.getAffineTransform(src, dst) lm_xy = np.expand_dims(region.landmarks[:self.nb_lms, :2], axis=0) lm_xy = np.squeeze(cv2.transform(lm_xy, mat)) # A segment of length 1 in the coordinates system of body bounding box takes region.rect_w_a pixels in the # original image. Then I arbitrarily divide by 4 for a more realistic appearance. lm_z = region.landmarks[:self.nb_lms, 2:3] * region.rect_w_a / 4 lm_xyz = np.hstack((lm_xy, lm_z)) if self.smoothing: lm_xyz = self.filter.apply(lm_xyz) region.landmarks_padded = lm_xyz.astype(np.int) # If we added padding to make the image square, we need to remove this padding from landmark coordinates # region.landmarks_abs contains absolute landmark coordinates in the original image (padding removed)) region.landmarks_abs = region.landmarks_padded.copy() if self.pad_h > 0: region.landmarks_abs[:, 1] -= self.pad_h if self.pad_w > 0: region.landmarks_abs[:, 0] -= self.pad_w if self.use_gesture: self.recognize_gesture(region) if self.show_segmentation: self.seg = np.squeeze(inference[self.lm_segmentation]) self.seg = 1 / (1 + np.exp(-self.seg)) def lm_render(self, frame, region): if region.lm_score > self.lm_score_threshold: if self.show_segmentation: ret, mask = cv2.threshold(self.seg, 0.5, 1, cv2.THRESH_BINARY) mask = (mask * 255).astype(np.uint8) cv2.imshow("seg", self.seg) # cv2.imshow("mask", mask) src = np.array( [[0, 0], [128, 0], [128, 128]], dtype=np.float32) # rect_points[0] is left bottom point ! dst = np.array(region.rect_points[1:], dtype=np.float32) mat = cv2.getAffineTransform(src, dst) mask = cv2.warpAffine(mask, mat, (self.frame_size, self.frame_size)) # cv2.imshow("mask2", mask) # mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) l = frame.shape[0] frame2 = cv2.bitwise_and(frame, frame, mask=mask) if not self.crop: frame2 = frame2[self.pad_h:l - self.pad_h, self.pad_w:l - self.pad_w] cv2.imshow("Segmentation", frame2) if self.show_rot_rect: cv2.polylines(frame, [np.array(region.rect_points)], True, (0, 255, 255), 2, cv2.LINE_AA) if self.show_landmarks: list_connections = LINES_FULL_BODY if self.full_body else LINES_UPPER_BODY lines = [ np.array( [region.landmarks_padded[point, :2] for point in line]) for line in list_connections ] cv2.polylines(frame, lines, False, (255, 180, 90), 2, cv2.LINE_AA) for i, x_y in enumerate(region.landmarks_padded[:self.nb_lms - 2, :2]): if i > 10: color = (0, 255, 0) if i % 2 == 0 else (0, 0, 255) elif i == 0: color = (0, 255, 255) elif i in [4, 5, 6, 8, 10]: color = (0, 255, 0) else: color = (0, 0, 255) cv2.circle(frame, (x_y[0], x_y[1]), 4, color, -11) if self.show_3d: points = region.landmarks_abs lines = LINE_MESH_FULL_BODY if self.full_body else LINE_MESH_UPPER_BODY colors = COLORS_FULL_BODY for i, a_b in enumerate(lines): a, b = a_b line = create_segment(points[a], points[b], radius=5, color=colors[i]) if line: self.vis3d.add_geometry(line, reset_bounding_box=False) if self.show_scores: cv2.putText(frame, f"Landmark score: {region.lm_score:.2f}", (region.landmarks_padded[24, 0] - 10, region.landmarks_padded[24, 1] + 90), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2) if self.use_gesture and self.show_gesture: cv2.putText(frame, region.gesture, (region.landmarks_padded[6, 0] - 10, region.landmarks_padded[6, 1] - 50), cv2.FONT_HERSHEY_PLAIN, 5, (0, 1190, 255), 3) def recognize_gesture(self, r): def angle_with_y(v): # v: 2d vector (x,y) # Returns angle in degree ofv with y-axis of image plane if v[1] == 0: return 90 angle = atan2(v[0], v[1]) return np.degrees(angle) # For the demo, we want to recognize the flag semaphore alphabet # For this task, we just need to measure the angles of both arms with vertical right_arm_angle = angle_with_y(r.landmarks_abs[14, :2] - r.landmarks_abs[12, :2]) left_arm_angle = angle_with_y(r.landmarks_abs[13, :2] - r.landmarks_abs[11, :2]) right_pose = int((right_arm_angle + 202.5) / 45) left_pose = int((left_arm_angle + 202.5) / 45) r.gesture = semaphore_flag.get((right_pose, left_pose), None) def run(self): self.fps = FPS(mean_nb_frames=20) nb_frames = 0 nb_pd_inferences = 0 nb_pd_inferences_direct = 0 nb_lm_inferences = 0 nb_lm_inferences_after_landmarks_ROI = 0 glob_pd_rtrip_time = 0 glob_lm_rtrip_time = 0 get_new_frame = True use_previous_landmarks = False global_time = time.perf_counter() while True: if get_new_frame: nb_frames += 1 if self.input_type == "image": vid_frame = self.img else: ok, vid_frame = self.cap.read() if not ok: break h, w = vid_frame.shape[:2] if self.crop: # Cropping the long side to get a square shape self.frame_size = min(h, w) dx = (w - self.frame_size) // 2 dy = (h - self.frame_size) // 2 video_frame = vid_frame[dy:dy + self.frame_size, dx:dx + self.frame_size] else: # Padding on the small side to get a square shape self.frame_size = max(h, w) self.pad_h = int((self.frame_size - h) / 2) self.pad_w = int((self.frame_size - w) / 2) video_frame = cv2.copyMakeBorder(vid_frame, self.pad_h, self.pad_h, self.pad_w, self.pad_w, cv2.BORDER_CONSTANT) annotated_frame = video_frame.copy() if not self.force_detection and use_previous_landmarks: self.regions = regions_from_landmarks mpu.detections_to_rect( self.regions, kp_pair=[0, 1] ) # self.regions.pd_kps are initialized from landmarks on previous frame mpu.rect_transformation(self.regions, self.frame_size, self.frame_size) else: # Infer pose detection # Resize image to NN square input shape frame_nn = cv2.resize(video_frame, (self.pd_w, self.pd_h), interpolation=cv2.INTER_AREA) # Transpose hxwx3 -> 1x3xhxw frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ] pd_rtrip_time = now() inference = self.pd_exec_net.infer( inputs={self.pd_input_blob: frame_nn}) glob_pd_rtrip_time += now() - pd_rtrip_time self.pd_postprocess(inference) self.pd_render(annotated_frame) nb_pd_inferences += 1 if get_new_frame: nb_pd_inferences_direct += 1 # Landmarks self.nb_active_regions = 0 if self.show_3d: self.vis3d.clear_geometries() self.vis3d.add_geometry(self.grid_floor, reset_bounding_box=False) self.vis3d.add_geometry(self.grid_wall, reset_bounding_box=False) if self.force_detection: for r in self.regions: frame_nn = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_w, self.lm_h) # Transpose hxwx3 -> 1x3xhxw frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ] # Get landmarks lm_rtrip_time = now() inference = self.lm_exec_net.infer( inputs={self.lm_input_blob: frame_nn}) glob_lm_rtrip_time += now() - lm_rtrip_time nb_lm_inferences += 1 self.lm_postprocess(r, inference) self.lm_render(annotated_frame, r) elif len(self.regions) == 1: r = self.regions[0] frame_nn = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_w, self.lm_h) # Transpose hxwx3 -> 1x3xhxw frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ] # Get landmarks lm_rtrip_time = now() inference = self.lm_exec_net.infer( inputs={self.lm_input_blob: frame_nn}) glob_lm_rtrip_time += now() - lm_rtrip_time nb_lm_inferences += 1 if use_previous_landmarks: nb_lm_inferences_after_landmarks_ROI += 1 self.lm_postprocess(r, inference) if not self.force_detection: if get_new_frame: if not use_previous_landmarks: # With a new frame, we have run the landmark NN on a ROI found by the detection NN... if r.lm_score > self.lm_score_threshold: # ...and succesfully found a body and its landmarks # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y) regions_from_landmarks = [ mpu.Region(pd_kps=r.landmarks_padded[ self.nb_lms - 2:self.nb_lms, :2] / self.frame_size) ] use_previous_landmarks = True else: # With a new frame, we have run the landmark NN on a ROI calculated from the landmarks of the previous frame... if r.lm_score > self.lm_score_threshold: # ...and succesfully found a body and its landmarks # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y) regions_from_landmarks = [ mpu.Region(pd_kps=r.landmarks_padded[ self.nb_lms - 2:self.nb_lms, :2] / self.frame_size) ] use_previous_landmarks = True else: # ...and could not find a body # We don't know if it is because the ROI calculated from the previous frame is not reliable (the body moved) # or because there is really no body in the frame. To decide, we have to run the detection NN on this frame get_new_frame = False use_previous_landmarks = False continue else: # On a frame on which we already ran the landmark NN without founding a body, # we have run the detection NN... if r.lm_score > self.lm_score_threshold: # ...and succesfully found a body and its landmarks use_previous_landmarks = True # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y) regions_from_landmarks = [ mpu.Region(pd_kps=r.landmarks_padded[ self.nb_lms - 2:self.nb_lms, :2] / self.frame_size) ] use_previous_landmarks = True # else: # ...and could not find a body # We are sure there is no body in that frame get_new_frame = True self.lm_render(annotated_frame, r) else: # Detection NN hasn't found any body get_new_frame = True self.fps.update() if self.show_3d: self.vis3d.poll_events() self.vis3d.update_renderer() if self.smoothing and self.nb_active_regions == 0: self.filter.reset() if not self.crop: annotated_frame = annotated_frame[self.pad_h:self.pad_h + h, self.pad_w:self.pad_w + w] if self.show_fps: self.fps.display(annotated_frame, orig=(50, 50), size=1, color=(240, 180, 100)) cv2.imshow("Blazepose", annotated_frame) if self.output: self.output.write(annotated_frame) key = cv2.waitKey(1) if key == ord('q') or key == 27: break elif key == 32: # Pause on space bar cv2.waitKey(0) elif key == ord('1'): self.show_pd_box = not self.show_pd_box elif key == ord('2'): self.show_pd_kps = not self.show_pd_kps elif key == ord('3'): self.show_rot_rect = not self.show_rot_rect elif key == ord('4'): self.show_landmarks = not self.show_landmarks elif key == ord('5'): self.show_scores = not self.show_scores elif key == ord('6'): self.show_gesture = not self.show_gesture elif key == ord('f'): self.show_fps = not self.show_fps elif key == ord('s'): self.show_segmentation = not self.show_segmentation # Print some stats print( f"FPS : {nb_frames/(time.perf_counter() - global_time):.1f} f/s (# frames = {nb_frames})" ) print( f"# pose detection inferences : {nb_pd_inferences} - # direct: {nb_pd_inferences_direct} - # after landmarks ROI failures: {nb_pd_inferences-nb_pd_inferences_direct}" ) print( f"# landmark inferences : {nb_lm_inferences} - # after pose detection: {nb_lm_inferences - nb_lm_inferences_after_landmarks_ROI} - # after landmarks ROI prediction: {nb_lm_inferences_after_landmarks_ROI}" ) print( f"Pose detection round trip : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms" ) if nb_lm_inferences: print( f"Landmark round trip : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms" ) if self.output: self.output.release()
def load_model(self, xml_file, bin_file, device = "MYRIAD", cpu_extension = None, precision = 'FP16'): # N : # of images in batch # C : Channel # H : Height # W : Width # Input => HWC if self._debug: logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name)) try: self.reset_engine() p_model = Path(xml_file).resolve() self.name = str(Path(p_model.name).stem) logging.info('==================================================================') logging.info('Loading Model') logging.info(' Name : {}'.format(self.name)) logging.info(' Target : {}'.format(device)) logging.info(' Model : {}'.format(xml_file)) logging.info(' Precision : {}'.format(precision)) version_data = self.ie.get_versions(device) self.ver_major = int(version_data[device].major) self.ver_minor = int(version_data[device].minor) self.ver_build = int(version_data[device].build_number) # self.plugin = IEPlugin(device=device) # if 'MYRIAD' in device: # #https://docs.openvinotoolkit.org/latest/_docs_IE_DG_supported_plugins_MYRIAD.html # self.plugin.set_config({"VPU_FORCE_RESET": "NO"}) if self.ie: del self.ie self.ie = IECore() if self.ver_major >= 2 and self.ver_minor >= 1 and self.ver_build >= 42025: self.ieNet = self.ie.read_network(model = xml_file, weights = bin_file) else: self.ieNet = IENetwork(model = xml_file, weights = bin_file) # process input # image_tensor : TensorFlow # data : Caffe if len(self.ieNet.inputs) > 2: logging.warning('!! Too many inputs. Not supported') return Model_Flag.LoadError # don't touch layers. Somehow touching layer will cause load failure with Myriad # logging.info(' -Layers') # logging.info(' Type : {}'.format(self.ieNet.layers[key].type)) # self.dump(self.ieNet.layers[key]) logging.info('==================================================================') logging.info('Output Blobs') for key, blob in self.ieNet.outputs.items(): logging.info('Output Key : {}'.format(key)) logging.info(' Layout : {}'.format(blob.layout)) logging.info(' Shape : {}'.format(blob.shape)) logging.info(' Precision : {}'.format(blob.precision)) # logging.info(' -Layers') # logging.info(' Type : {}'.format(self.ieNet.layers[key].type)) # self.dump(self.ieNet.layers[key]) # blob.precision = precision logging.info('==================================================================') logging.info('Input Blobs') for key, blob in self.ieNet.inputs.items(): logging.info('Input Key : {}'.format(key)) logging.info(' Layout : {}'.format(blob.layout)) logging.info(' Shape : {}'.format(blob.shape)) logging.info(' Precision : {}'.format(blob.precision)) # blob.precision = precision logging.info('>> Loading model to {}'.format(device)) # self.exec_net = self.ie.load_network(network = self.ieNet, device_name = device, num_requests = 2) self.exec_net = self.ie.load_network(network = self.ieNet, device_name = device, num_requests = 2) logging.info('<< Model loaded to {}'.format(device)) # # touch layers only after we load # self.output_blob_key = next(iter(self.ieNet.outputs)) for key, blob in self.ieNet.outputs.items(): layer = self.ieNet.layers[key] if layer.type == 'DetectionOutput': outputFormat = Output_Format.DetectionOutput elif layer.type == 'RegionYolo': outputFormat = Output_Format.RegionYolo elif layer.type == 'Convolution': if layer.name == 'Mconv7_stage2_L1' or layer.name == 'Mconv7_stage2_L2': outputFormat = Output_Format.HumanPose else: return Model_Flag.Unsupported if outputFormat == Output_Format.DetectionOutput: if len(self.ieNet.inputs) == 1 and len(self.ieNet.outputs) == 1: # 1 input, 1 output input_key = next(iter(self.ieNet.inputs)) output_key = next(iter(self.ieNet.outputs)) layer = self.ieNet.layers[output_key] if layer.type == 'DetectionOutput': outputFormat = Output_Format.DetectionOutput else: return Model_Flag.Unsupported if input_key == 'image_tensor': self.inputFormat = Input_Format.Tensorflow elif input_key == 'image': self.inputFormat = Input_Format.IntelIR elif input_key == 'data': self.inputFormat = Input_Format.Caffe else: self.inputFormat = Input_Format.Other params = self.ieNet.layers[output_key].params input_blob = self.ieNet.inputs[input_key] self.result_processor = Object_Detection_Processor( model_name = self.name, input_format = self.inputFormat, input_key = input_key, input_shape = input_blob.shape, input_layout = input_blob.layout, output_format = outputFormat, output_key = output_key, output_params = params) elif len(self.ieNet.inputs) == 2 and len(self.ieNet.outputs) == 1: # 2 inputs and 1 output. Faster RCNN output_key = next(iter(self.ieNet.outputs)) layer = self.ieNet.layers[output_key] if layer.type != 'DetectionOutput': return Model_Flag.Unsupported info_key = "" data_key = "" for key, blob in self.ieNet.inputs.items(): if key == 'image_info': info_key = key elif key == 'image_tensor': data_key = key if len(info_key) > 0 and len(data_key) > 0: self.inputFormat = Input_Format.Faster_RCNN input_blob = self.ieNet.inputs[data_key] params = self.ieNet.layers[output_key].params self.result_processor = Object_Detection_RCNN_Processor( model_name = self.name, input_format = self.inputFormat, info_key = info_key, data_key = data_key, data_shape = input_blob.shape, data_layout = input_blob.layout, output_format = Output_Format.DetectionOutput, output_key = output_key, output_params = params) else: return Model_Flag.Unsupported elif outputFormat == Output_Format.RegionYolo: input_key = next(iter(self.ieNet.inputs)) input_blob = self.ieNet.inputs[input_key] self.inputFormat = Input_Format.Yolo self.result_processor = Object_Detection_Yolo_Processor( model_name = self.name, input_format = self.inputFormat, input_key = input_key, input_shape = input_blob.shape, input_layout = input_blob.layout, output_format = Output_Format.RegionYolo) for key, blob in self.ieNet.outputs.items(): self.result_processor.reshape_data[key] = self.ieNet.layers[self.ieNet.layers[key].parents[0]].shape self.result_processor.set_class_label(self.ieNet.layers[key].params) # for key, blob in self.result_processor.reshape_data.items(): # print('{} {}'.format(key, blob)) elif outputFormat == Output_Format.HumanPose: input_key = next(iter(self.ieNet.inputs)) input_blob = self.ieNet.inputs[input_key] self.inputFormat = Input_Format.HumanPose self.result_processor = Human_Pose_Processor( model_name = self.name, input_format = Input_Format.HumanPose, input_shape = input_blob.shape, input_layout = input_blob.layout ) return Model_Flag.Loaded except Exception as ex: exc_type, exc_obj, exc_tb = sys.exc_info() traceback.print_exception(exc_type, exc_obj, exc_tb) logging.error('!! {0}:{1}() : Exception {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, ex)) return Model_Flag.LoadError
class Network: """ Load and configure inference plugins for the specified target devices and performs synchronous and asynchronous modes for the specified infer requests. """ def __init__(self): ### TODO: Initialize any class variables desired ### self.net = None self.plugin = None self.input_blob = None self.out_blob = None self.net_plugin = None self.infer_request_handle = None def load_model(self, model, device, input_size, output_size, num_requests, cpu_extension=None, plugin=None): ### TODO: Load the model ### model_xml = model # get IR binary file weight model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for specified device # and load extensions library if specified if not plugin: log.info("Initializing plugin for {} device...".format(device)) # load inference engine API named it as plugin self.plugin = IECore() else: self.plugin = plugin # with IENetwork load the model with architecture XML and weight with binary file # Read the IR, load IR files log.info("Reading IR...") self.net = IENetwork(model=model_xml, weights=model_bin) log.info("Loading IR to the plugin...") ### TODO: Add any necessary extensions ### # Add a CPU extension if applicable if cpu_extension and "CPU" in device: self.plugin.add_extension(cpu_extension, device) ### Get the supported layers of the network supported_layers = self.plugin.query_network(network=self.net, device_name="CPU") ### TODO: Check for supported layers ### ### Check for any unsupported layers, and let the user ### know if anything is missing. Exit the program, if so. unsupported_layers = [l for l in self.net.layers.keys() if l not in supported_layers] if len(unsupported_layers) != 0: print("Unsupported layers found: {}".format(unsupported_layers)) print("Check whether extensions are available to add to IECore.") exit(1) ### TODO: Return the loaded inference plugin ### # Load network read from IR into plugin(Inference Engine) if num_requests == 0: self.net_plugin = self.plugin.load_network(self.net, device) else: self.net_plugin = self.plugin.load_network(self.net, device, num_requests=num_requests) # Get the input layer self.input_blob = next(iter(self.net.inputs)) self.out_blob = next(iter(self.net.outputs)) assert len(self.net.inputs.keys()) == input_size, \ "Supports only {} input topologies".format(len(self.net.inputs)) assert len(self.net.outputs) == output_size, \ "Supports only {} output topologies".format(len(self.net.outputs)) ### Note: You may need to update the function parameters. ### return self.plugin, self.get_input_shape() def get_input_shape(self): ### TODO: Return the shape of the input layer ### return self.net.inputs[self.input_blob].shape def exec_net(self, request_id, frame): ### TODO: Start an asynchronous request ### ### TODO: Return any necessary information ### ### Note: You may need to update the function parameters. ### self.infer_request_handle = self.net_plugin.start_async(request_id=request_id, inputs={self.input_blob: frame}) return self.net_plugin def wait(self, request_id): ### TODO: Wait for the request to be complete. ### ### TODO: Return any necessary information ### ### Note: You may need to update the function parameters. ### # status = self.exec_network.requests[0].wait(-1) status = self.net_plugin.requests[request_id].wait(-1) return status def get_output(self, request_id, output=None): ### TODO: Extract and return the output results ### Note: You may need to update the function parameters. ### # out = self.infer_request_handle.outputs[self.output_blob] if output: res = self.infer_request_handle.outputs[output] else: res = self.net_plugin.requests[request_id].outputs[self.out_blob] return res def clean(self): """ Deletes all the instances :return: None """ del self.net_plugin del self.plugin del self.net def performance_counter(self, request_id): """ Queries performance measures per layer to get feedback of what is the most time consuming layer. :param request_id: Index of Infer request value. Limited to device capabilities :return: Performance of the layer """ perf_count = self.net_plugin.requests[request_id].get_perf_counts() return perf_count
import streamlit as st from openvino.inference_engine import IECore st.title("hello!") ie = IECore() model_path = "model/model.xml" ie_net = ie.read_network(model=model_path, weights=model_path.replace("xml", "bin")) exec_net = ie.load_network(network=ie_net, num_requests=1, device_name="CPU") st.markdown(exec_net)
from openvino.inference_engine import IENetwork, IECore import numpy as np import time # Getting model bin and xml file model_path='pool_cnn/pool_cnn' model_weights=model_path+'.bin' model_structure=model_path+'.xml' # TODO: Load the model # Use either IECore or IEPlugin API core = IECore() model = IENetwork(model_structure, model_weights) net = core.load_network(network=model, device_name='CPU') input_name=next(iter(net.inputs)) # Reading and Preprocessing Image input_img=np.load('image.npy') input_img=input_img.reshape(1, 28, 28) input_dict={input_name:input_img} # TODO: Using the input image, run inference on the model for 10 iterations start=time.time() for _ in range(10): net.infer(input_dict) # TODO: Finish the print statement
def main(args): # Search available NCS2 devices on the system MYRIADs = [] ie = IECore() for device in ie.available_devices: if 'MYRIAD' in device: MYRIADs.append(device) num_devices = len(MYRIADs) print('{} MYRIAD devices found. {}'.format(len(MYRIADs), MYRIADs)) if num_devices == 0: return model = 'public/googlenet-v1/FP16/googlenet-v1' net = ie.read_network(model + '.xml', model + '.bin') # Build up the device descriptor if num_devices == 1: device = 'MYRIAD' else: device = 'MULTI' for i, MYRIAD in enumerate(MYRIADs): device += ',' if i != 0 else ':' device += MYRIAD print('Device name : {}'.format(device)) inblob = list(net.input_info.keys())[0] inshape = net.input_info[inblob].tensor_desc.dims outblob = list(net.outputs.keys())[0] outshape = net.outputs[outblob].shape config = {'VPU_HW_STAGES_OPTIMIZATION': 'YES'} # default = 'YES' num_requests = 4 * num_devices execnet = ie.load_network(net, device, config=config, num_requests=num_requests) dummy = np.random.rand(1, 3, 224, 224) niter = 100 print('Start inferencing ({} times, {})'.format( niter, 'SYNC' if args.sync else 'ASYNC')) start = time.monotonic() for i in range(niter): if args.sync == True: execnet.infer(inputs={inblob: dummy}) # Synchronous inference else: reqId = -1 while reqId == -1: reqId = execnet.get_idle_request_id() execnet.requests[reqId].async_infer( inputs={inblob: dummy}) # Asynchronous inference if args.sync == False: # Wait for all requests to complete for i in range(num_requests): execnet.requests[i].wait() end = time.monotonic() print('Performance = {} FPS'.format(niter / (end - start)))
class PersonDetect: ''' Class for the Person Detection Model. ''' def __init__(self, model_name, device, threshold=0.60): self.model_weights = model_name + '.bin' self.model_structure = model_name + '.xml' self.device = device self.threshold = threshold try: self.model = IENetwork(self.model_structure, self.model_weights) except Exception as e: raise ValueError( "Could not Initialise the network. Have you enterred the correct model path?" ) self.input_name = next(iter(self.model.inputs)) self.input_shape = self.model.inputs[self.input_name].shape self.output_name = next(iter(self.model.outputs)) self.output_shape = self.model.outputs[self.output_name].shape def load_model(self): ''' Loading the network in core ''' self.plugin = IECore() self.exec_network = self.plugin.load_network(network=self.model, device_name=self.device) def predict(self, image): ''' Detecting people in image ''' preprocessed_input = self.preprocess_input(image) self.exec_network.infer({self.input_name: preprocessed_input}) result = self.exec_network.requests[0] coords = self.preprocess_outputs(result.outputs['detection_out']) height, width = image.shape[:2] for coord in coords: coord[0] = coord[0] * width coord[1] = coord[1] * height coord[2] = coord[2] * width coord[3] = coord[3] * height preprocessed_image = self.draw_outputs(coords, image) return coords, preprocessed_image def draw_outputs(self, coords, image): ''' Drawing rectangles around detected people ''' for coord in coords: (startX, startY, endX, endY) = coord cv2.rectangle(image, (startX, startY), (endX, endY), (255, 0, 0), 2) cv2.rectangle(image, (620, 1), (915, 562), (0, 0, 0), 5) return image def preprocess_outputs(self, outputs): ''' Processing the output to get the bounding box with required threshold ''' coords = [] for i in np.arange(0, outputs.shape[2]): confidence = outputs[0, 0, i, 2] if confidence > self.threshold: box = outputs[0, 0, i, 3:7] coords.append(box) return coords def preprocess_input(self, image): ''' Preprocessing the input to fit the the inference engine ''' b, c, h, w = self.input_shape prepo = np.copy(image) prepo = cv2.resize(prepo, (w, h)) prepo = prepo.transpose((2, 0, 1)) prepo = prepo.reshape(1, c, h, w) return prepo
class FaceDetectionModel: ''' Class for the Face Detection Model. ''' def __init__(self, model_name, device='CPU', extensions=None): ''' TODO: Use this to set your instance variables. ''' self.model_name = model_name self.device = device self.extensions = extensions self.model_structure = self.model_name self.model_weights = self.model_name.split('.')[0]+'.bin' self.core = None self.network = None self.exec_net = None self.input = None self.output = None self.mode = 'async' self.request_id = 0 def load_model(self): ''' TODO: You will need to complete this method. This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' self.core = IECore() self.network = self.core.read_network(model=self.model_structure, weights=self.model_weights) self.exec_net = self.core.load_network(network=self.network, device_name=self.device,num_requests=self.num_requests) self.input = next(iter(self.network.inputs)) self.output = next(iter(self.network.outputs)) return self.exec_net def predict(self, image, prob_threshold): ''' TODO: You will need to complete this method. This method is meant for running predictions on the input image. ''' processed_frame = self.preprocess_input(image) self.exec_net.start_async(request_id=self.request_id, inputs={self.input: processed_frame}) self.exec_net.requests[0].get_perf_counts() if self.mode == 'async': self.exec_net.requests[self.request_id].wait() result = self.exec_net.requests[self.request_id].outputs[self.output] croppedFace, box = self.preprocess_output(result[0][0], image,prob_threshold) return croppedFace, box else: if self.exec_net.requests[self.request_id].wait(-1) == 0: result = self.exec_net.requests[self.request_id].outputs[self.output] croppedFace, box = self.preprocess_output(result[0][0], image,prob_threshold) return croppedFace, box def check_model(self): supported_layers = self.core.query_network(network=self.network, device_name=self.device) unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers] # if len(unsupported_layers) > 0: # print("Please check extention for these unsupported layers =>" + str(unsupported_layers)) # exit(1) # print("All layers are supported !!") if len(unsupported_layers)!=0 and self.device=='CPU': print("unsupported layers found:{}".format(unsupported_layers)) if not self.extensions==None: print("Adding cpu_extension") self.core.add_extension(self.extensions, self.device) supported_layers = self.core.query_network(network = self.network, device_name=self.device) unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers] if len(unsupported_layers)!=0: print("After adding the extension still unsupported layers found") exit(1) print("After adding the extension the issue is resolved") else: print("Give the path of cpu extension") exit(1) def preprocess_input(self, image): ''' Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. ''' model_input_shape = self.network.inputs[self.input].shape image_resized = cv2.resize(image, (model_input_shape[3], model_input_shape[2])) p_frame = np.transpose(np.expand_dims(image_resized,axis=0), (0,3,1,2)) return p_frame def preprocess_output(self, outputs, prob_threshold): ''' Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. ''' coords =[] result = outputs[self.output_names][0][0] for res in result: conf = res[2] if conf>prob_threshold: x_min=res[3] y_min=res[4] x_max=res[5] y_max=res[6] coords.append([x_min,y_min,x_max,y_max]) return coords
class Network: ''' Load and store information for working with the Inference Engine, and any loaded models. ''' def __init__(self): self.plugin = None self.network = None self.input_blob = None self.output_blob = None self.exec_network = None self.infer_request = None def load_model(self, model, device="CPU", cpu_extension=None): ''' Load the model given IR files. Defaults to CPU as device for use in the workspace. Synchronous requests made within. ''' model_xml = model model_bin = os.path.splitext(model_xml)[0] + ".bin" # Initialize the plugin self.plugin = IECore() # Add a CPU extension, if applicable if cpu_extension and "CPU" in device: self.plugin.add_extension(cpu_extension, device) # Read the IR as a IENetwork self.network = IENetwork(model=model_xml, weights=model_bin) # Load the IENetwork into the plugin self.exec_network = self.plugin.load_network(network=self.network, num_requests=2, device_name=device) # Get the input layer self.input_blob = next(iter(self.network.inputs)) self.output_blob = next(iter(self.network.outputs)) return def check_device_extension(self, log, device="CPU"): if "CPU" in device: supported_layers = self.plugin.query_network(self.network, "CPU") not_supported_layers = [ l for l in self.network.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( "Following layers are not supported by the plugin for specified device {}:\n {}" .format(args.device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) assert len(self.network.inputs.keys( )) == 1, "Sample supports only YOLO V3 based single input topologies" return def get_input_shape(self): ''' Gets the input shape of the network ''' return self.network.inputs[self.input_blob].shape def async_inference(self, image, request_id=0): ''' Makes an asynchronous inference request, given an input image. ''' ### TODO: Start asynchronous inference self.infer_request = self.exec_network.start_async( request_id=request_id, inputs={self.input_blob: image}) return def wait(self): ''' Checks the status of the inference request. ''' ### TODO: Wait for the async request to be complete status = self.exec_network.requests[0].wait(-1) return status def extract_output(self, request_id=0): ''' Returns a list of the results for the output layer of the network. ''' ### TODO: Return the outputs of the network from the output_blob return self.exec_network.requests[request_id].outputs
def test_add_outputs_with_and_without_port(): ie = IECore() net = ie.read_network(model=test_net_xml, weights=test_net_bin) net.add_outputs('28/Reshape') net.add_outputs([('29/WithoutBiases', 0)]) assert sorted(net.outputs) == ['28/Reshape', '29/WithoutBiases', 'fc_out']
(W, H) = (None, None) (newW, newH) = (args["width"], args["height"]) (rW, rH) = (None, None) mean = np.array([123.68, 116.779, 103.939][::-1], dtype="float16") # define the two output layer names for the EAST detector model that # we are interested -- the first is the output probabilities and the # second can be used to derive the bounding box coordinates of text # load the pre-trained EAST text detector print("[INFO] loading EAST text detector...") model_xml = args["east"] model_bin = os.path.splitext(model_xml)[0] + ".bin" ie = IECore() net = ie.read_network(model_xml, model_bin) input_info = net.input_info input_blob = next(iter(input_info)) exec_net = ie.load_network(network=net, device_name=args["device"]) print("INFO: loading OCR model....") model_rec_xml = args["rec"] model_rec_bin = os.path.splitext(model_rec_xml)[0] + ".bin" #reads network from .xml and .bin formats net_rec = ie.read_network(model_rec_xml, model_rec_bin)
def test_batch_size_setter(): ie = IECore() net = ie.read_network(model=test_net_xml, weights=test_net_bin) net.batch_size = 4 assert net.batch_size == 4 assert net.input_info['data'].input_data.shape == [4, 3, 32, 32]
def run_demo(args): cap = open_images_capture(args.input, args.loop) log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) ie = IECore() log.info('Reading Object Detection model {}'.format(args.model_od)) detector_person = Detector(ie, args.model_od, device=args.device, label_class=args.person_label) log.info('The Object Detection model {} is loaded to {}'.format( args.model_od, args.device)) log.info('Reading Human Pose Estimation model {}'.format(args.model_hpe)) single_human_pose_estimator = HumanPoseEstimator(ie, args.model_hpe, device=args.device) log.info('The Human Pose Estimation model {} is loaded to {}'.format( args.model_hpe, args.device)) delay = int(cap.get_type() in ('VIDEO', 'CAMERA')) video_writer = cv2.VideoWriter() frames_processed = 0 presenter = monitors.Presenter(args.utilization_monitors, 25) metrics = PerformanceMetrics() start_time = perf_counter() frame = cap.read() if frame is None: raise RuntimeError("Can't read an image from the input") if args.output and not video_writer.open( args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), (frame.shape[1], frame.shape[0])): raise RuntimeError("Can't open video writer") while frame is not None: bboxes = detector_person.detect(frame) human_poses = [ single_human_pose_estimator.estimate(frame, bbox) for bbox in bboxes ] presenter.drawGraphs(frame) colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0)] for pose, bbox in zip(human_poses, bboxes): cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (255, 0, 0), 2) for id_kpt, kpt in enumerate(pose): cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 3, colors[id_kpt], -1) metrics.update(start_time, frame) frames_processed += 1 if video_writer.isOpened() and (args.output_limit <= 0 or frames_processed <= args.output_limit): video_writer.write(frame) if not args.no_show: cv2.imshow('Human Pose Estimation Demo', frame) key = cv2.waitKey(delay) if key == 27: break presenter.handleKey(key) start_time = perf_counter() frame = cap.read() metrics.log_total() for rep in presenter.reportMeans(): log.info(rep)
def test_reshape(): ie = IECore() net = ie.read_network(model=test_net_xml, weights=test_net_bin) net.reshape({"data": (2, 3, 32, 32)})
# Workaround for reshaping bug c1 = net.layers['79/Cast_11815_const'] c1.blobs['custom'][4] = inp_h c1.blobs['custom'][5] = inp_w c2 = net.layers['86/Cast_11811_const'] c2.blobs['custom'][2] = out_h c2.blobs['custom'][3] = out_w # Reshape network to specific size net.reshape({'0': [1, 3, inp_h, inp_w], '1': [1, 3, out_h, out_w]}) #Load network to device ie = IECore() exec_net = ie.load_network(net, 'CPU') # Prepare input inp = img.transpose(2, 0, 1) # interleaved to planar (HWC -> CHW) inp = inp.reshape(1, 3, inp_h, inp_w) inp = inp.astype(np.float32) # Prepare second input - bicubic resize of first input resized_img = cv.resize(img, (out_w, out_h), interpolation=cv.INTER_CUBIC) resized = resized_img.transpose(2, 0, 1) resized = resized.reshape(1, 3, out_h, out_w) resized = resized.astype(np.float32) outs = exec_net.infer({'0': inp, '1': resized})