def infer_data(data: dict, exec_net: ExecutableNetwork, input_blobs: list, output_blobs: list) -> np.ndarray: """Do a synchronous matrix inference""" matrix_shape = next(iter(data.values())).shape result = {} for blob_name in output_blobs: shape = exec_net.outputs[blob_name].shape batch_size = shape[0] result[blob_name] = np.ndarray((matrix_shape[0], shape[-1])) slice_begin = 0 slice_end = batch_size while slice_begin < matrix_shape[0]: vectors = {blob_name: data[blob_name][slice_begin:slice_end] for blob_name in input_blobs} num_of_vectors = next(iter(vectors.values())).shape[0] if num_of_vectors < batch_size: temp = {blob_name: np.zeros((batch_size, vectors[blob_name].shape[1])) for blob_name in input_blobs} for blob_name in input_blobs: temp[blob_name][:num_of_vectors] = vectors[blob_name] vectors = temp vector_results = exec_net.infer(vectors) for blob_name in output_blobs: result[blob_name][slice_begin:slice_end] = vector_results[blob_name][:num_of_vectors] slice_begin += batch_size slice_end += batch_size return result
def infer_async_thread_proc( net, exec_net: ExecutableNetwork, dev_thread_request_id: int, image_list: list, first_image_index: int, last_image_index: int, num_total_inferences: int, result_list: list, result_index: int, start_barrier: threading.Barrier, end_barrier: threading.Barrier, simultaneous_infer_per_thread: int, infer_result_queue: queue.Queue, input_blob, output_blob): # Sync with the main start barrier start_barrier.wait() # Start times for the fps counter start_time = time.time() end_time = start_time handle_list = [None] * simultaneous_infer_per_thread image_index = first_image_index image_result_start_index = 0 inferences_per_req = int(num_total_inferences / simultaneous_infer_per_thread) # For each thread, 6 async inference requests will be created for outer_index in range(0, inferences_per_req): # Start the simultaneous async inferences for infer_id in range(0, simultaneous_infer_per_thread): new_request_id = dev_thread_request_id + infer_id handle_list[infer_id] = exec_net.start_async( request_id=new_request_id, inputs={input_blob: image_list[image_index]}) image_index += 1 if (image_index > last_image_index): image_index = first_image_index # Wait for the simultaneous async inferences to finish. for wait_index in range(0, simultaneous_infer_per_thread): infer_status = handle_list[wait_index].wait() result = handle_list[wait_index].outputs[output_blob] top_index = numpy.argsort(result, axis=1)[0, -1:][::-1] top_index = top_index[0] prob = result[0][top_index] infer_result_queue.put((top_index, prob)) handle_list[wait_index] = None # Save the time spent on inferences within this inference thread and associated reader thread end_time = time.time() total_inference_time = end_time - start_time result_list[result_index] = total_inference_time print("Thread " + str(result_index) + " end barrier reached") # Wait for all inference threads to finish end_barrier.wait()
def infer_data( data: dict, exec_net: ExecutableNetwork, input_blobs: list, output_blobs: list, cw_l: int = 0, cw_r: int = 0, ) -> np.ndarray: """Do a synchronous matrix inference""" matrix_shape = next(iter(data.values())).shape result = {} for blob_name in output_blobs: output_shape = exec_net.outputs[blob_name].shape batch_size = output_shape[0] result[blob_name] = np.ndarray((matrix_shape[0], np.prod(output_shape[1:]))) for i in range(-cw_l, matrix_shape[0] + cw_r, batch_size): if i < 0: index = 0 elif i >= matrix_shape[0]: index = matrix_shape[0] - 1 else: index = i vectors = {blob_name: data[blob_name][index:index + batch_size] for blob_name in input_blobs} num_of_vectors = next(iter(vectors.values())).shape[0] if num_of_vectors < batch_size: temp = {blob_name: np.zeros((batch_size, vectors[blob_name].shape[1])) for blob_name in input_blobs} for blob_name in input_blobs: temp[blob_name][:num_of_vectors] = vectors[blob_name] vectors = temp for blob_name in input_blobs: vectors[blob_name] = vectors[blob_name].reshape(exec_net.input_info[blob_name].input_data.shape) vector_results = exec_net.infer(vectors) if i - cw_r < 0: continue for blob_name in output_blobs: vector_result = vector_results[blob_name].reshape((batch_size, result[blob_name].shape[1])) result[blob_name][i - cw_r:i - cw_r + batch_size] = vector_result[:num_of_vectors] return result
def infer_async_thread_proc( exec_net: ExecutableNetwork, first_request_index: int, image_list: list, image_filename_list: list, display_image_list: list, first_image_index: int, last_image_index: int, num_total_inferences: int, result_list: list, result_index: int, start_barrier: threading.Barrier, end_barrier: threading.Barrier, simultaneous_infer_per_thread: int, infer_result_queue: queue.Queue, input_blob, output_blob): # image_list is list is of numpy.ndarray (preprocessed images) # image_filename_list is list of strings are filename of corresponding image in image_list # sync with the main start barrier start_barrier.wait() start_time = time.time() end_time = start_time handle_list = [None] * simultaneous_infer_per_thread image_index = first_image_index image_result_start_index = 0 # do all work to be done by the thread for outer_index in range( 0, int(num_total_inferences / simultaneous_infer_per_thread)): # Start the simultaneous async inferences for start_index in range(0, simultaneous_infer_per_thread): # handle_list handle_list[start_index] = exec_net.start_async( request_id=first_request_index + start_index, inputs={input_blob: image_list[image_index]}), image_filename_list[ image_index], display_image_list[image_index] image_index += 1 if (image_index > last_image_index): image_index = first_image_index # Wait for the simultaneous async inferences to finish. for wait_index in range(0, simultaneous_infer_per_thread): res = None infer_stat = handle_list[wait_index][0].wait() res = handle_list[wait_index][0].outputs[output_blob] top_ind = numpy.argsort(res, axis=1)[0, -1:][::-1] top_ind = top_ind[0] prob = res[0][top_ind] image_filename = handle_list[wait_index][1] display_image = handle_list[wait_index][2] # put a tuple on the output queue with (filename, top index, probability, and display_image) infer_result_queue.put( (image_filename, top_ind, prob, display_image), True) handle_list[wait_index] = None if (quit_flag == True): # the quit flag was set from main so break out of loop break # save the time spent on inferences within this inference thread and associated reader thread end_time = time.time() total_inference_time = end_time - start_time result_list[result_index] = total_inference_time print("thread " + str(result_index) + " end barrier reached") # wait for all inference threads to finish end_barrier.wait()
class MtCNNFaceDetection(InferenceBase): Config = MTCNNFaceDetectionConfig() OpenVinoExecutablesP = list() OpenVinoExecutableR = ExecutableNetwork() OpenVinoExecutableO = ExecutableNetwork() OpenVinoNetworkP = IENetwork() OpenVinoNetworkR = IENetwork() OpenVinoNetworkO = IENetwork() Scales = [] RINPUT = [] OINPUT = [] LastFaceDetections = [] LastLandmarkDetections = [] InputLayerP = str() InputLayerR = str() InputLayerO = str() OutputLayersP = list() OutputLayersR = list() OutputLayersO = list() InputShapeP = [] InputShapeR = [] InputShapeO = [] def __init__(self, config=MTCNNFaceDetectionConfig()): super(MtCNNFaceDetection, self).__init__(config) self.Config = config def prepare_detector(self): """ Override Base Class Since MTCNN works with three different model :return: None """ if self.Config.ModelPath is None or self.Config.ModelName is None: return None logging.log(logging.INFO, "Setting Up R - O Network Input Storage") self.RINPUT = np.zeros(dtype=float, shape=(self.Config.RInputBatchSize, 3, 24, 24)) self.OINPUT = np.zeros(dtype=float, shape=(self.Config.OInputBatchSize, 3, 48, 48)) self.OpenVinoIE = IECore() if self.Config.CpuExtension and 'CPU' in self.Config.TargetDevice: logging.log(logging.INFO, "CPU Extensions Added") self.OpenVinoIE.add_extension(self.Config.CpuExtensionPath, "CPU") try: # Model File Paths model_file = self.Config.ModelPath + self.Config.PModelFileName + ".xml" model_weights = self.Config.ModelPath + self.Config.PModelFileName + ".bin" logging.log(logging.INFO, "Loading Models File {}".format(model_file)) logging.log(logging.INFO, "Loading Weights File {}".format(model_weights)) self.OpenVinoNetworkP = IENetwork(model=model_file, weights=model_weights) logging.log(logging.INFO, "Loading P Network") model_file = self.Config.ModelPath + self.Config.RModelFileName + ".xml" model_weights = self.Config.ModelPath + self.Config.RModelFileName + ".bin" logging.log(logging.INFO, "Loading Models File {}".format(model_file)) logging.log(logging.INFO, "Loading Weights File {}".format(model_weights)) self.OpenVinoNetworkR = IENetwork(model=model_file, weights=model_weights) self.OpenVinoNetworkR.batch_size = self.Config.RInputBatchSize logging.log(logging.INFO, "Loading R Network") model_file = self.Config.ModelPath + self.Config.OModelFileName + ".xml" model_weights = self.Config.ModelPath + self.Config.OModelFileName + ".bin" logging.log(logging.INFO, "Loading Models File {}".format(model_file)) logging.log(logging.INFO, "Loading Weights File {}".format(model_weights)) self.OpenVinoNetworkO = IENetwork(model=model_file, weights=model_weights) self.OpenVinoNetworkO.batch_size = self.Config.OInputBatchSize logging.log(logging.INFO, "Loading O Network") except FileNotFoundError: logging.log(logging.ERROR, FileNotFoundError.strerror, " ", FileNotFoundError.filename) exit(-1) if "CPU" in self.Config.TargetDevice: supported_layers = self.OpenVinoIE.query_network( self.OpenVinoNetworkP, "CPU") not_supported_layers = [ l for l in self.OpenVinoNetworkP.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: logging.log( logging.INFO, "Following layers are not supported by the plugin for specified device {}:\n {}" .format(self.Config.TargetDevice, ', '.join(not_supported_layers))) logging.log( logging.INFO, "Please try to specify cpu extensions library path in config.json file " ) # Input / Output Memory Allocations to feed input or get output values self.InputLayerP = next(iter(self.OpenVinoNetworkP.inputs)) self.InputLayerR = next(iter(self.OpenVinoNetworkP.inputs)) self.InputLayerO = next(iter(self.OpenVinoNetworkP.inputs)) self.OutputLayersP = list(self.OpenVinoNetworkP.outputs) self.OutputLayersR = list(self.OpenVinoNetworkR.outputs) self.OutputLayersO = list(self.OpenVinoNetworkO.outputs) self.InputShapeP = self.OpenVinoNetworkP.inputs[self.InputLayerP].shape self.InputShapeR = self.OpenVinoNetworkR.inputs[self.InputLayerR].shape self.InputShapeO = self.OpenVinoNetworkO.inputs[self.InputLayerO].shape # Enable Dynamic Batch By Default config = {"DYN_BATCH_ENABLED": "YES"} self.OpenVinoExecutableR = self.OpenVinoIE.load_network( network=self.OpenVinoNetworkR, device_name=self.Config.TargetDevice, config=config, num_requests=self.Config.RequestCount) logging.log(logging.INFO, "Created R Network Executable") self.OpenVinoExecutableO = self.OpenVinoIE.load_network( network=self.OpenVinoNetworkO, device_name=self.Config.TargetDevice, config=config, num_requests=self.Config.RequestCount) logging.log(logging.INFO, "Created O Network Executable") self.Config.MinLength = min(self.Config.InputHeight, self.Config.InputWidth) M = self.Config.MinDetectionSize / self.Config.MinimumFaceSize self.Config.MinLength *= M while self.Config.MinLength > self.Config.MinDetectionSize: scale = (M * self.Config.Factor**self.Config.FactorCount) self.Scales.append(scale) self.Config.MinLength *= self.Config.Factor self.Config.FactorCount += 1 sw, sh = math.ceil(self.Config.InputWidth * scale), math.ceil( self.Config.InputHeight * scale) self.OpenVinoNetworkP.reshape({self.InputLayerP: (1, 3, sh, sw)}) self.OpenVinoExecutablesP.append( self.OpenVinoIE.load_network( network=self.OpenVinoNetworkP, device_name=self.Config.TargetDevice, num_requests=self.Config.RequestCount)) logging.log( logging.INFO, "Created Scaled P Networks {}".format( len(self.OpenVinoExecutablesP))) def run_mtcnn_face_detection(self, images, request_id=0): """ Get Detected Face Coordinates :param images: :param request_id: :return: """ self.InferenceCount += 1 start_time = time.time() bounding_boxes = [] landmarks = [] cv_img = cv.cvtColor(images, cv.COLOR_BGR2RGB) image = Image.fromarray(cv_img) none_count = 0 for i, scale in enumerate(self.Scales): width, height = image.size sw, sh = math.ceil(width * scale), math.ceil(height * scale) img = image.resize((sw, sh), Image.BILINEAR) img = np.asarray(img, 'float32') img = self.preprocess(img) output = self.OpenVinoExecutablesP[i].infer( {self.InputLayerP: img}) probs = output["prob1"][0, 1, :, :] offsets = output["conv4_2"] boxes = self.generate_bboxes(probs, offsets, scale, self.Config.PNetworkThreshold) if len(boxes) == 0: bounding_boxes.append(None) none_count += 1 else: keep = self.nms(boxes[:, 0:5], overlap_threshold=0.5) bounding_boxes.append(boxes[keep]) if len(bounding_boxes) > none_count: bounding_boxes = [i for i in bounding_boxes if i is not None] bounding_boxes = np.vstack(bounding_boxes) keep = self.nms(bounding_boxes[:, 0:5], self.Config.NMSThresholds[0]) bounding_boxes = bounding_boxes[keep] bounding_boxes = self.calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:]) bounding_boxes = self.convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) img_boxes = self.get_image_boxes(bounding_boxes, image, size=24) if img_boxes.shape[0] > 0: shp = img_boxes.shape self.RINPUT[0:shp[0], ] = img_boxes self.OpenVinoExecutableR.requests[request_id].set_batch(shp[0]) self.OpenVinoExecutableR.requests[request_id].infer( {self.InputLayerR: self.RINPUT}) offsets = self.OpenVinoExecutableR.requests[0].outputs[ 'conv5_2'][:shp[0], ] probs = self.OpenVinoExecutableR.requests[0].outputs[ 'prob1'][:shp[0]] keep = np.where(probs[:, 1] > self.Config.RNetworkThreshold)[0] bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) offsets = offsets[keep] keep = self.nms(bounding_boxes, self.Config.NMSThresholds[1]) bounding_boxes = bounding_boxes[keep] bounding_boxes = self.calibrate_box(bounding_boxes, offsets[keep]) bounding_boxes = self.convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) img_boxes = self.get_image_boxes(bounding_boxes, image, size=48) if img_boxes.shape[0] > 0: shp = img_boxes.shape self.OINPUT[0:shp[0], ] = img_boxes self.OpenVinoExecutableO.requests[0].set_batch(shp[0]) self.OpenVinoExecutableO.requests[0].infer( {self.InputLayerO: self.OINPUT}) landmarks = self.OpenVinoExecutableO.requests[0].outputs[ 'conv6_3'][:shp[0]] offsets = self.OpenVinoExecutableO.requests[0].outputs[ 'conv6_2'][:shp[0]] probs = self.OpenVinoExecutableO.requests[0].outputs[ 'prob1'][:shp[0]] keep = np.where( probs[:, 1] > self.Config.ONetworkThreshold)[0] bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) offsets = offsets[keep] landmarks = landmarks[keep] # compute landmark points width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0 height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0 xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1] landmarks[:, 0:5] = np.expand_dims( xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5] landmarks[:, 5:10] = np.expand_dims( ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10] bounding_boxes = self.calibrate_box( bounding_boxes, offsets) keep = self.nms(bounding_boxes, self.Config.NMSThresholds[2], mode='min') bounding_boxes = bounding_boxes[keep] landmarks = landmarks[keep] none_count = 0 face_detections = [] landmark_detections = [] i = 0 for box in bounding_boxes: if type(box) is type(None): none_count += 1 else: scale = box[4] xmin = float((box[0] / scale) / self.Config.InputWidth) ymin = float((box[1] / scale) / self.Config.InputHeight) xmax = float((box[2] / scale) / self.Config.InputWidth) ymax = float((box[3] / scale) / self.Config.InputHeight) face_detections.append([xmin, ymin, xmax, ymax]) lands = [] for l in range(5): lands.append( float((landmarks[i][l] / scale) / self.Config.InputWidth)) lands.append( float((landmarks[i][l + 5] / scale) / self.Config.InputHeight)) landmark_detections.append(lands) i += 1 if none_count == len(bounding_boxes): return [], [] self.LastFaceDetections = face_detections self.LastLandmarkDetections = landmark_detections self.ElapsedInferenceTime += (time.time() - start_time) def infer(self, images, request_id=0): """ Run inference :param images: image to get faces :param request_id: request id :return: """ self.run_mtcnn_face_detection(images, request_id=0) def request_ready(self, request_id): """ This is true by default since there is no ASYNC mode for MTCNN :param request_id: :return: """ return True def get_face_detection_data(self, request_id=0): """ Get Latest Results for Face Coordinates :param request_id: :return: """ lastDetections = self.LastFaceDetections self.LastFaceDetections = [] return lastDetections def get_face_landmarks_data(self, request_id=0): """ Get Latest Results for Landmark Coordinates :param request_id: :return: """ lastDetections = self.LastLandmarkDetections self.LastLandmarkDetections = [] return lastDetections @staticmethod def preprocess(img): """Preprocessing step before feeding the network. Arguments: img: a float numpy array of shape [h, w, c]. Returns: a float numpy array of shape [1, c, h, w]. """ img = img.transpose((2, 0, 1)) img = np.expand_dims(img, 0) img = (img - 127.5) * 0.0078125 return img @staticmethod def generate_bboxes(probs, offsets, scale, threshold): """Generate bounding boxes at places where there is probably a face. Arguments: probs: a float numpy array of shape [n, m]. offsets: a float numpy array of shape [1, 4, n, m]. scale: a float number, width and height of the image were scaled by this number. threshold: a float number. Returns: a float numpy array of shape [n_boxes, 9] """ # applying P-Net is equivalent, in some sense, to # moving 12x12 window with stride 2 stride = 2 cell_size = 12 # indices of boxes where there is probably a face inds = np.where(probs > threshold) if inds[0].size == 0: return np.array([]) # transformations of bounding boxes tx1, ty1, tx2, ty2 = [ offsets[0, i, inds[0], inds[1]] for i in range(4) ] # they are defined as: # w = x2 - x1 + 1 # h = y2 - y1 + 1 # x1_true = x1 + tx1*w # x2_true = x2 + tx2*w # y1_true = y1 + ty1*h # y2_true = y2 + ty2*h offsets = np.array([tx1, ty1, tx2, ty2]) score = probs[inds[0], inds[1]] # P-Net is applied to scaled images # so we need to rescale bounding boxes back bounding_boxes = np.vstack([ np.round((stride * inds[1] + 1.0) / scale), np.round((stride * inds[0] + 1.0) / scale), np.round((stride * inds[1] + 1.0 + cell_size) / scale), np.round((stride * inds[0] + 1.0 + cell_size) / scale), score, offsets ]) # why one is added? return bounding_boxes.T @staticmethod def nms(boxes, overlap_threshold=0.5, mode='union'): """Non-maximum suppression. Arguments: boxes: a float numpy array of shape [n, 5], where each row is (xmin, ymin, xmax, ymax, score). overlap_threshold: a float number. mode: 'union' or 'min'. Returns: list with indices of the selected boxes """ # if there are no boxes, return the empty list if len(boxes) == 0: return [] # list of picked indices pick = [] # grab the coordinates of the bounding boxes x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)] area = (x2 - x1 + 1.0) * (y2 - y1 + 1.0) ids = np.argsort(score) # in increasing order while len(ids) > 0: # grab index of the largest value last = len(ids) - 1 i = ids[last] pick.append(i) # compute intersections # of the box with the largest score # with the rest of boxes # left top corner of intersection boxes ix1 = np.maximum(x1[i], x1[ids[:last]]) iy1 = np.maximum(y1[i], y1[ids[:last]]) # right bottom corner of intersection boxes ix2 = np.minimum(x2[i], x2[ids[:last]]) iy2 = np.minimum(y2[i], y2[ids[:last]]) # width and height of intersection boxes w = np.maximum(0.0, ix2 - ix1 + 1.0) h = np.maximum(0.0, iy2 - iy1 + 1.0) # intersections' areas inter = w * h if mode == 'min': overlap = inter / np.minimum(area[i], area[ids[:last]]) elif mode == 'union': # intersection over union (IoU) overlap = inter / (area[i] + area[ids[:last]] - inter) # delete all boxes where overlap is too big ids = np.delete( ids, np.concatenate([[last], np.where(overlap > overlap_threshold)[0]])) return pick @staticmethod def calibrate_box(bboxes, offsets): """Transform bounding boxes to be more like true bounding boxes. 'offsets' is one of the outputs of the nets. Arguments: bboxes: a float numpy array of shape [n, 5]. offsets: a float numpy array of shape [n, 4]. Returns: a float numpy array of shape [n, 5]. """ x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] w = x2 - x1 + 1.0 h = y2 - y1 + 1.0 w = np.expand_dims(w, 1) h = np.expand_dims(h, 1) # this is what happening here: # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)] # x1_true = x1 + tx1*w # y1_true = y1 + ty1*h # x2_true = x2 + tx2*w # y2_true = y2 + ty2*h # below is just more compact form of this # are offsets always such that # x1 < x2 and y1 < y2 ? translation = np.hstack([w, h, w, h]) * offsets bboxes[:, 0:4] = bboxes[:, 0:4] + translation return bboxes @staticmethod def convert_to_square(bboxes): """Convert bounding boxes to a square form. Arguments: bboxes: a float numpy array of shape [n, 5]. Returns: a float numpy array of shape [n, 5], squared bounding boxes. """ square_bboxes = np.zeros_like(bboxes) x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] h = y2 - y1 + 1.0 w = x2 - x1 + 1.0 max_side = np.maximum(h, w) square_bboxes[:, 0] = x1 + w * 0.5 - max_side * 0.5 square_bboxes[:, 1] = y1 + h * 0.5 - max_side * 0.5 square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0 square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0 return square_bboxes @staticmethod def correct_bboxes(bboxes, width, height): """Crop boxes that are too big and get coordinates with respect to cutouts. Arguments: bboxes: a float numpy array of shape [n, 5], where each row is (xmin, ymin, xmax, ymax, score). width: a float number. height: a float number. Returns: dy, dx, edy, edx: a int numpy arrays of shape [n], coordinates of the boxes with respect to the cutouts. y, x, ey, ex: a int numpy arrays of shape [n], corrected ymin, xmin, ymax, xmax. h, w: a int numpy arrays of shape [n], just heights and widths of boxes. in the following order: [dy, edy, dx, edx, y, ey, x, ex, w, h]. """ x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] w, h = x2 - x1 + 1.0, y2 - y1 + 1.0 num_boxes = bboxes.shape[0] # 'e' stands for end # (x, y) -> (ex, ey) x, y, ex, ey = x1, y1, x2, y2 # we need to cut out a box from the image. # (x, y, ex, ey) are corrected coordinates of the box # in the image. # (dx, dy, edx, edy) are coordinates of the box in the cutout # from the image. dx, dy = np.zeros((num_boxes, )), np.zeros((num_boxes, )) edx, edy = w.copy() - 1.0, h.copy() - 1.0 # if box's bottom right corner is too far right ind = np.where(ex > width - 1.0)[0] edx[ind] = w[ind] + width - 2.0 - ex[ind] ex[ind] = width - 1.0 # if box's bottom right corner is too low ind = np.where(ey > height - 1.0)[0] edy[ind] = h[ind] + height - 2.0 - ey[ind] ey[ind] = height - 1.0 # if box's top left corner is too far left ind = np.where(x < 0.0)[0] dx[ind] = 0.0 - x[ind] x[ind] = 0.0 # if box's top left corner is too high ind = np.where(y < 0.0)[0] dy[ind] = 0.0 - y[ind] y[ind] = 0.0 return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h] return_list = [i.astype('int32') for i in return_list] return return_list @staticmethod def get_image_boxes(bounding_boxes, img, size=24): """Cut out boxes from the image. Arguments: bounding_boxes: a float numpy array of shape [n, 5]. img: an instance of PIL.Image. size: an integer, size of cutouts. Returns: a float numpy array of shape [n, 3, size, size]. """ num_boxes = len(bounding_boxes) width, height = img.size [dy, edy, dx, edx, y, ey, x, ex, w, h] = MtCNNFaceDetection.correct_bboxes(bounding_boxes, width, height) img_boxes = np.zeros((num_boxes, 3, size, size), 'float32') for i in range(num_boxes): if h[i] <= 0 or w[i] <= 0: continue img_box = np.zeros((h[i], w[i], 3), 'uint8') img_array = np.asarray(img, 'uint8') img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] = \ img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :] # resize img_box = Image.fromarray(img_box) img_box = img_box.resize((size, size), Image.BILINEAR) img_box = np.asarray(img_box, 'float32') img_boxes[i, :, :, :] = MtCNNFaceDetection.preprocess(img_box) return img_boxes
class InferenceBase(object): """ Base Class to Load a Model with Inference Engine """ Config = InferenceConfig() '''Inference Engine Components''' OpenVinoIE = IECore() OpenVinoNetwork = IENetwork() OpenVinoExecutable = ExecutableNetwork() '''Model Components''' InputLayer = str() InputLayers = list() OutputLayer = str() OutputLayers = list() InputShape = None OutputShape = None '''Performance Metrics Storage''' ElapsedInferenceTime = 0.0 InferenceCount = 0.0 def __init__(self, infer_config): self.Config = infer_config self.prepare_detector() def prepare_detector(self): """ Load Model, Libraries According to Given Configuration. :return: """ if self.Config.ModelPath is None or self.Config.ModelName is None: return None ''' Model File Paths ''' model_file = self.Config.ModelPath + self.Config.ModelName + '.xml' model_weights = self.Config.ModelPath + self.Config.ModelName + '.bin' logging.log(logging.INFO, "Model File {}".format(model_file)) logging.log(logging.INFO, "Model Weights {}".format(model_weights)) ''' Create IECore Object ''' self.OpenVinoIE = IECore() ''' If target device is CPU add extensions ''' if self.Config.CpuExtension and 'CPU' in self.Config.TargetDevice: logging.log( logging.INFO, "Adding CPU Extensions, Path {}".format( self.Config.CpuExtensionPath)) self.OpenVinoIE.add_extension(self.Config.CpuExtensionPath, "CPU") ''' Try loading network ''' try: self.OpenVinoNetwork = IENetwork(model=model_file, weights=model_weights) logging.log(logging.INFO, "Loaded IENetwork") except FileNotFoundError: logging.log( logging.ERROR, FileNotFoundError.strerror + " " + FileNotFoundError.filename) logging.log(logging.ERROR, "Exiting ....") exit(-1) ''' Print supported/not-supported layers ''' if "CPU" in self.Config.TargetDevice: supported_layers = self.OpenVinoIE.query_network( self.OpenVinoNetwork, "CPU") not_supported_layers = [ l for l in self.OpenVinoNetwork.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: logging.log( logging.WARN, "Following layers are not supported by the plugin for specified device {}:\n {}" .format(self.Config.TargetDevice, ', '.join(not_supported_layers))) logging.log( logging.WARN, "Please try to specify cpu extensions library path in config.json file " ) '''Input / Output Memory Allocations to feed input or get output values''' self.InputLayer = next(iter(self.OpenVinoNetwork.inputs)) logging.log(logging.INFO, "Input Layer ".format(self.InputLayer)) N, C, H, W = self.OpenVinoNetwork.inputs[self.InputLayer].shape if self.Config.BatchSize > N: self.OpenVinoNetwork.batch_size = self.Config.BatchSize else: self.Config.BatchSize = self.OpenVinoNetwork.batch_size self.OutputLayer = next(iter(self.OpenVinoNetwork.outputs)) logging.log(logging.INFO, "Output Layer ".format(self.OutputLayer)) self.InputLayers = list(self.OpenVinoNetwork.inputs) logging.log(logging.INFO, "Input Layers ".format(self.InputLayers)) self.OutputLayers = list(self.OpenVinoNetwork.outputs) logging.log(logging.INFO, "Output Layers ".format(self.OutputLayers)) self.InputShape = self.OpenVinoNetwork.inputs[self.InputLayer].shape logging.log(logging.INFO, "Input Shape: {}".format(self.InputShape)) self.OutputShape = self.OpenVinoNetwork.outputs[self.OutputLayer].shape logging.log(logging.INFO, "Output Shape: {}".format(self.OutputShape)) '''Set Configurations''' config = {} if self.Config.DynamicBatch: config["DYN_BATCH_ENABLE"] = "YES" logging.log(logging.INFO, "Enabling Dynamic Batch Mode") if self.Config.Async: logging.log(logging.INFO, "Async Mode Enabled") self.OpenVinoExecutable = self.OpenVinoIE.load_network( network=self.OpenVinoNetwork, device_name=self.Config.TargetDevice, config=config, num_requests=self.Config.RequestCount) logging.log(logging.INFO, "Completed Loading Neural Network") return None def preprocess_input(self, input_data): """ Pre-process Input According to Loaded Network :param input_data: :return: """ n, c, h, w = self.OpenVinoNetwork.inputs[self.InputLayer].shape logging.log( logging.DEBUG, "Pre-processing Input to Shape {}".format( self.OpenVinoNetwork.inputs[self.InputLayer].shape)) resized = cv.resize(input_data, (w, h)) color_converted = cv.cvtColor(resized, cv.COLOR_BGR2RGB) transposed = np.transpose(color_converted, (2, 0, 1)) reshaped = np.expand_dims(transposed, axis=0) return reshaped def infer(self, input_data, request_id=0): """ Used to send data to network and start forward propagation. :param input_data: :param request_id: :return: """ if self.Config.Async: logging.log(logging.DEBUG, "Async Infer Request Id {}".format(request_id)) self.infer_async(input_data, request_id) else: logging.log(logging.DEBUG, "Infer Request Id {}".format(request_id)) self.infer_sync(input_data, request_id) def infer_async(self, input_data, request_id=0): """ Start Async Infer for Given Request Id :param input_data: :param request_id: :return: """ self.InferenceCount += 1 processed_input = self.preprocess_input(input_data) self.OpenVinoExecutable.requests[request_id].async_infer( inputs={self.InputLayer: processed_input}) def infer_sync(self, input_data, request_id=0): """ Start Sync Infer :param input_data: :param request_id: :return: """ self.InferenceCount += 1 processed_input = self.preprocess_input(input_data) start = time.time() self.OpenVinoExecutable.requests[request_id].infer( inputs={self.InputLayer: processed_input}) end = time.time() self.ElapsedInferenceTime += (end - start) def request_ready(self, request_id): """ Check if request is ready :param request_id: id to check request :return: bool """ if self.Config.Async: if self.OpenVinoExecutable.requests[request_id].wait(0) == 0: return True else: return True return False def get_results(self, output_layer, request_id=0): """ Get results from the network. :param output_layer: output layer :param request_id: request id :return: """ logging.log(logging.DEBUG, "Getting Results Request Id {}".format(request_id)) return self.OpenVinoExecutable.requests[request_id].outputs[ output_layer] def print_inference_performance_metrics(self): """ Print Performance Data Collection :return: """ if self.Config.Async: logging.log( logging.WARN, 'Async Mode Inferred Frame Count {}'.format( self.InferenceCount)) else: logging.log( logging.WARN, "Sync Mode Inferred Frame Count {}".format( self.InferenceCount)) logging.log( logging.WARN, "Inference Per Input: {} MilliSeconds".format( (self.ElapsedInferenceTime / self.InferenceCount) * 1000))
def infer_async_thread_proc( exec_net: ExecutableNetwork, first_request_index: int, image_list: list, image_filename_list: list, display_image_list: list, first_image_index: int, last_image_index: int, num_total_inferences: int, result_list: list, result_index: int, start_barrier: threading.Barrier, end_barrier: threading.Barrier, simultaneous_infer_per_thread: int, infer_result_queue: queue.Queue): # image_list is list is of numpy.ndarray (preprocessed images) # image_filename_list is list of strings are filename of corresponding image in image_list input_blob = 'data' out_blob = 'prob' while (True): # sync with the main start barrier #usually we'll wait on simultaneous_infer_per_thread but the last time it could be less. images_to_wait_on = simultaneous_infer_per_thread start_barrier.wait() start_time = time.time() end_time = start_time handle_list = [None] * simultaneous_infer_per_thread image_index = first_image_index while (image_index <= last_image_index): # Start the simultaneous async inferences for start_index in range(0, simultaneous_infer_per_thread): handle_list[start_index] = exec_net.start_async( request_id=first_request_index + start_index, inputs={ input_blob: image_list[image_index] }), image_filename_list[image_index], display_image_list[ image_index], image_index image_index += 1 if (image_index > last_image_index): images_to_wait_on = start_index + 1 break # done with all our images. # Wait for the simultaneous async inferences to finish. for wait_index in range(0, images_to_wait_on): res = None infer_stat = handle_list[wait_index][0].wait() res = handle_list[wait_index][0].outputs[out_blob] top_ind = numpy.argsort(res, axis=1)[0, -1:][::-1] top_ind = top_ind[0] prob = res[0][top_ind] image_filename = handle_list[wait_index][1] display_image = handle_list[wait_index][2] display_image_index = handle_list[wait_index][3] # put a tuple on the output queue with (filename, top index, probability, display_image, and display_image_index) infer_result_queue.put((image_filename, top_ind, prob, display_image, display_image_index), True) handle_list[wait_index] = None if (quit_flag == True): # the quit flag was set from main so break out of loop break # save the time spent on inferences within this inference thread and associated reader thread end_time = time.time() total_inference_time = end_time - start_time result_list[result_index] = total_inference_time print("thread " + str(result_index) + " end barrier reached") # wait for all inference threads to finish end_barrier.wait() if (not reset_flag): print("thread " + str(result_index) + " Not resetting, breaking") break print("thread " + str(result_index) + " looping back")