def predictions_to_object(predictions, raw_img, ratio, nms_thr, score_thr): boxes = predictions[:, :4] scores = predictions[:, 4:5] * predictions[:, 5:] boxes_xyxy = np.ones_like(boxes) boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2. boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2. boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2. boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2. boxes_xyxy /= ratio dets = multiclass_nms(boxes_xyxy, scores, nms_thr, score_thr) detect_object = [] if dets is not None: img_size_h, img_size_w = raw_img.shape[:2] final_boxes, final_scores, final_cls_inds = dets[:, : 4], dets[:, 4], dets[:, 5] for i, box in enumerate(final_boxes): x1, y1, x2, y2 = box c = int(final_cls_inds[i]) r = ailia.DetectorObject( category=c, prob=final_scores[i], x=x1 / img_size_w, y=y1 / img_size_h, w=(x2 - x1) / img_size_w, h=(y2 - y1) / img_size_h, ) detect_object.append(r) return detect_object
def compute_blazeface(detector, frame): BLAZEFACE_INPUT_IMAGE_HEIGHT = 128 BLAZEFACE_INPUT_IMAGE_WIDTH = 128 # preprocessing img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = cv2.resize(img, (BLAZEFACE_INPUT_IMAGE_WIDTH, BLAZEFACE_INPUT_IMAGE_HEIGHT)) image = image.transpose((2, 0, 1)) # channel first image = image[np.newaxis, :, :, :] # (batch_size, channel, h, w) input_data = image / 127.5 - 1.0 # inference preds_ailia = detector.predict([input_data]) # postprocessing org_detections = [] blaze_face_detections = postprocess(preds_ailia) for idx in range(len(blaze_face_detections)): obj = blaze_face_detections[idx] if len(obj)==0: continue d = obj[0] obj = ailia.DetectorObject( category = 0, prob = 1.0, x = d[1], y = d[0], w = d[3]-d[1], h = d[2]-d[0] ) org_detections.append(obj) return org_detections
def post_processing(img_shape, all_boxes, all_scores, indices): indices = indices.astype(np.int) bboxes = [] for idx_ in indices[0]: cls_ind = idx_[1] score = all_scores[tuple(idx_)] idx_1 = (idx_[0], idx_[2]) box = all_boxes[idx_1] y, x, y2, x2 = box w = (x2 - x) / img_shape[1] h = (y2 - y) / img_shape[0] x /= img_shape[1] y /= img_shape[0] r = ailia.DetectorObject( category=cls_ind, prob=score, x=x, y=y, w=w, h=h, ) bboxes.append(r) return bboxes
def recognize_from_image(): env_id = args.env_id detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # input image loop for image_path in args.input: # prepare input data logger.debug(f'input image: {image_path}') raw_img = cv2.imread(image_path) img = cv2.resize(raw_img, dsize=(1280, 896)) img = np.transpose(img, (2, 0, 1)) img = np.expand_dims(img, 0) img = img / 255.0 logger.debug(f'input image shape: {raw_img.shape}') # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) pred = detector.predict(img) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') else: pred = detector.predict(img) pred = non_max_suppression_numpy(pred, THRESHOLD, IOU) for i, det in enumerate(pred): if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], raw_img.shape).round() img_size_h, img_size_w = raw_img.shape[:2] output = [] # Write results for *xyxy, conf, cls in det: xyxy = [int(v) for v in xyxy] x1, y1, x2, y2 = xyxy r = ailia.DetectorObject( category=int(cls), prob=conf, x=x1 / img_size_w, y=y1 / img_size_h, w=(x2 - x1) / img_size_w, h=(y2 - y1) / img_size_h, ) output.append(r) detect_object = reverse_letterbox(output, raw_img, (raw_img.shape[0], raw_img.shape[1])) res_img = plot_results(detect_object, raw_img, COCO_CATEGORY) savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, res_img) logger.info('Script finished successfully.')
def recognize_from_frame(net, detector, frame): spoof_thresh = args.spoof_thresh # detect face detections = compute_blazeface( detector, frame, anchor_path='../../face_detection/blazeface/anchorsback.npy', back=True, min_score_thresh=FACE_MIN_SCORE_THRESH) # adjust face rectangle new_detections = [] for detection in detections: margin = 1.5 r = ailia.DetectorObject( category=detection.category, prob=detection.prob, x=detection.x - detection.w * (margin - 1.0) / 2, y=detection.y - detection.h * (margin - 1.0) / 2 - detection.h * margin / 8, w=detection.w * margin, h=detection.h * margin, ) new_detections.append(r) # crop, preprocess images = [] detections = [] for obj in new_detections: # get detected face margin = 1.0 crop_img, top_left, bottom_right = crop_blazeface(obj, margin, frame) if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0: continue img = preprocess(crop_img) images.append(img) detections.append( (top_left[0], top_left[1], bottom_right[0], bottom_right[1])) if not images: return frame images = np.concatenate(images) # feedforward output = net.predict([images]) logits = output[0] preds = softmax(logits, axis=1) frame = draw_detections(frame, detections, preds, spoof_thresh) return frame
def convert_to_ailia_detector_object(bboxes, scores, cls_inds, w, h): detector_object = [] for i, box in enumerate(bboxes): cls_indx = int(cls_inds[i]) r = ailia.DetectorObject( category=cls_indx, prob=scores[i], x=box[0] / w, y=box[1] / h, w=(box[2] - box[0]) / w, h=(box[3] - box[1]) / h, ) detector_object.append(r) return detector_object
def compute_blazeface_with_keypoint(detector, frame, anchor_path='anchors.npy'): BLAZEFACE_INPUT_IMAGE_HEIGHT = 128 BLAZEFACE_INPUT_IMAGE_WIDTH = 128 # preprocessing img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = cv2.resize(img, (BLAZEFACE_INPUT_IMAGE_WIDTH, BLAZEFACE_INPUT_IMAGE_HEIGHT)) image = image.transpose((2, 0, 1)) # channel first image = image[np.newaxis, :, :, :] # (batch_size, channel, h, w) input_data = image / 127.5 - 1.0 # inference preds_ailia = detector.predict([input_data]) # postprocessing detections = [] keypoints = [] blaze_face_detections = postprocess(preds_ailia, anchor_path) for idx in range(len(blaze_face_detections)): obj = blaze_face_detections[idx] if len(obj)==0: continue d = obj[0] # face position obj = ailia.DetectorObject( category = 0, prob = 1.0, x = d[1], y = d[0], w = d[3]-d[1], h = d[2]-d[0] ) detections.append(obj) # keypoint potision keypoint = { "eye_left_x":blaze_face_detections[idx][0][4],"eye_left_y":blaze_face_detections[idx][0][5], "eye_right_x":blaze_face_detections[idx][0][6],"eye_right_y":blaze_face_detections[idx][0][7] } keypoints.append(keypoint) return detections, keypoints
def convert_to_ailia_detector_object(preds, w, h): i = 0 detector_object = [] for j in range(len(preds[i]['rois'])): (x1, y1, x2, y2) = preds[i]['rois'][j].astype(np.int) obj = preds[i]['class_ids'][j] score = float(preds[i]['scores'][j]) r = ailia.DetectorObject( category=obj, prob=score, x=x1 / w, y=y1 / h, w=(x2 - x1) / w, h=(y2 - y1) / h, ) detector_object.append(r) return detector_object
def detect(self, img): raw_shape = img.shape img_input, ResizeM = self.preprocess(img) scores, raw_boxes = self.infer_image(img_input) bboxs, labels, confs = self.postprocess(scores, raw_boxes, ResizeM, raw_shape) img_size_h, img_size_w = img.shape[:2] output = [] for i, box in enumerate(bboxs): x1, y1, x2, y2 = box c = int(labels[i]) r = ailia.DetectorObject( category=c, prob=confs[i], x=x1 / img_size_w, y=y1 / img_size_h, w=(x2 - x1) / img_size_w, h=(y2 - y1) / img_size_h, ) output.append(r) return output
def reverse_letterbox(detections, img, det_shape): h, w = img.shape[0], img.shape[1] pad_x = pad_y = 0 if det_shape != None: scale = np.max((h / det_shape[0], w / det_shape[1])) start = (det_shape[0:2] - np.array(img.shape[0:2]) / scale) // 2 pad_x = start[1] * scale pad_y = start[0] * scale new_detections = [] for detection in detections: logger.debug(detection) r = ailia.DetectorObject( category=detection.category, prob=detection.prob, x=(detection.x * (w + pad_x * 2) - pad_x) / w, y=(detection.y * (h + pad_y * 2) - pad_y) / h, w=(detection.w * (w + pad_x * 2)) / w, h=(detection.h * (h + pad_y * 2)) / h, ) new_detections.append(r) return new_detections
def post_processing(data, boxes, labels, masks): bbox_list = [boxes[labels == i, :] for i in range(len(CATEGORY))] mask_list = [masks[labels == i, :] for i in range(len(CATEGORY))] ########################################### # remove duplicate new_bbox_list = [] new_mask_list = [] for idx, (bbox, mask) in enumerate(zip(bbox_list, mask_list)): if len(bbox) < 1: new_bbox_list.append(None) new_mask_list.append(None) continue i = np.argmax(bbox[:, -1]) new_bbox_list.append(bbox[i, :]) new_mask_list.append(mask[i, :]) bbox_list = new_bbox_list mask_list = new_mask_list ######################################### ori_shape = data['ori_shape'][:2] img_shape = data['img_shape'][:2] scale_factor = data['scale_factor'] ret_boxes = [] segm_masks = [] for cls_ind, (box, mask) in enumerate(zip(bbox_list, mask_list)): if box is None: continue score = box[-1] x, y, x2, y2 = box[:4] if score < args.threshold: continue w = (x2 - x) h = (y2 - y) ori_x = int(x / scale_factor[1]) ori_y = int(y / scale_factor[0]) ori_x2 = int(x2 / scale_factor[1]) ori_y2 = int(y2 / scale_factor[0]) ori_w = int(w / scale_factor[1]) ori_h = int(h / scale_factor[0]) # segment mask mask = cv2.resize(mask, (ori_w, ori_h), interpolation=cv2.INTER_LINEAR) segm_mask = np.zeros((max(ori_shape[0], ori_y2), max(ori_shape[1], ori_x2))) segm_mask[ori_y:ori_y + ori_h, ori_x:ori_x + ori_w] = mask segm_mask = segm_mask[:ori_shape[0], :ori_shape[1]] segm_mask = (segm_mask > RCNN_MASK_THRE).astype(np.uint8) # bbox w = w / img_shape[1] h = h / img_shape[0] x = x / img_shape[1] y = y / img_shape[0] r = ailia.DetectorObject( category=cls_ind, prob=score, x=x, y=y, w=w, h=h, ) ret_boxes.append(r) segm_masks.append(segm_mask) return ret_boxes, segm_masks
def post_processing(img, conf_thresh, nms_thresh, output): # [batch, num, 1, 4] box_array = output[0] # [batch, num, num_classes] confs = output[1] t1 = time.time() if type(box_array).__name__ != 'ndarray': box_array = box_array.cpu().detach().numpy() confs = confs.cpu().detach().numpy() num_classes = confs.shape[2] # [batch, num, 4] box_array = box_array[:, :, 0] # [batch, num, num_classes] --> [batch, num] max_conf = np.max(confs, axis=2) max_id = np.argmax(confs, axis=2) t2 = time.time() bboxes_batch = [] for i in range(box_array.shape[0]): argwhere = max_conf[i] > conf_thresh l_box_array = box_array[i, argwhere, :] l_max_conf = max_conf[i, argwhere] l_max_id = max_id[i, argwhere] bboxes = [] # nms for each class for j in range(num_classes): cls_argwhere = l_max_id == j ll_box_array = l_box_array[cls_argwhere, :] ll_max_conf = l_max_conf[cls_argwhere] ll_max_id = l_max_id[cls_argwhere] keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh) if (keep.size > 0): ll_box_array = ll_box_array[keep, :] ll_max_conf = ll_max_conf[keep] ll_max_id = ll_max_id[keep] for k in range(ll_box_array.shape[0]): r = ailia.DetectorObject( category=ll_max_id[k], prob=ll_max_conf[k], x=ll_box_array[k, 0], y=ll_box_array[k, 1], w=ll_box_array[k, 2] - ll_box_array[k, 0], h=ll_box_array[k, 3] - ll_box_array[k, 1], ) bboxes.append(r) bboxes_batch.append(bboxes) t3 = time.time() print('-----------------------------------') print(' max and argmax : %f' % (t2 - t1)) print(' nms : %f' % (t3 - t2)) print('Post processing total : %f' % (t3 - t1)) print('-----------------------------------') return bboxes_batch
def recognize_from_video(): # net initialize env_id = args.env_id detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: logger.warning( 'currently, video results cannot be output correctly...') f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) save_h, save_w = f_h, f_w writer = webcamera_utils.get_writer(args.savepath, save_h, save_w) else: writer = None while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break raw_img = frame img = cv2.resize(raw_img, dsize=(1280, 896)) img = np.transpose(img, (2, 0, 1)) img = np.expand_dims(img, 0) img = img / 255.0 pred = detector.predict(img) pred = non_max_suppression_numpy(pred, THRESHOLD, IOU) for i, det in enumerate(pred): if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], raw_img.shape).round() img_size_h, img_size_w = raw_img.shape[:2] output = [] # Write results for *xyxy, conf, cls in det: xyxy = [int(v) for v in xyxy] x1, y1, x2, y2 = xyxy r = ailia.DetectorObject( category=int(cls), prob=conf, x=x1 / img_size_w, y=y1 / img_size_h, w=(x2 - x1) / img_size_w, h=(y2 - y1) / img_size_h, ) output.append(r) detect_object = reverse_letterbox(output, raw_img, (raw_img.shape[0], raw_img.shape[1])) res_img = plot_results(detect_object, raw_img, COCO_CATEGORY) cv2.imshow('frame', res_img) # save results if writer is not None: writer.write(res_img) capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() logger.info('Script finished successfully.')
def post_processing(img, conf_thres, nms_thres, outputs): batch_detections = [] img_size_w = img.shape[3] img_size_h = img.shape[2] batch_size = 1 num_classes = 80 anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] boxs = [] a = np.array(anchors).reshape(3, -1, 2) anchor_grid = a.copy().reshape(3, 1, -1, 1, 1, 2) #onnx output #(1, 3, 80, 80, 85) # anchor 0 #(1, 3, 40, 40, 85) # anchor 1 #(1, 3, 20, 20, 85) # anchor 2 #[cx,cy,w,h,conf,pred_cls(80)] for index, out in enumerate(outputs): batch = out.shape[1] feature_h = out.shape[2] feature_w = out.shape[3] # Feature map corresponds to the original image zoom factor stride_w = int(img_size_w / feature_w) stride_h = int(img_size_h / feature_h) grid_x, grid_y = np.meshgrid(np.arange(feature_w), np.arange(feature_h)) # cx, cy, w, h pred_boxes = np.zeros(out[..., :4].shape) pred_boxes[..., 0] = (sigmoid(out[..., 0]) * 2.0 - 0.5 + grid_x) * stride_w # cx pred_boxes[..., 1] = (sigmoid(out[..., 1]) * 2.0 - 0.5 + grid_y) * stride_h # cy pred_boxes[..., 2:4] = (sigmoid(out[..., 2:4]) * 2) ** 2 * anchor_grid[index] # wh conf = sigmoid(out[..., 4]) pred_cls = sigmoid(out[..., 5:]) output = np.concatenate((pred_boxes.reshape(batch_size, -1, 4), conf.reshape(batch_size, -1, 1), pred_cls.reshape(batch_size, -1, num_classes)), -1) boxs.append(output) outputx = np.concatenate(boxs, 1) # NMS batch_detections = non_max_suppression(outputx, num_classes, conf_thres=conf_thres, nms_thres=nms_thres) # output ailia format detections = batch_detections[0] if detections is None: return [[]] labels = detections[..., -1] boxs = detections[..., :4] confs = detections[..., 4] bboxes = [] bboxes_batch = [] for i, box in enumerate(boxs): x1, y1, x2, y2 = box c = int(labels[i]) r = ailia.DetectorObject( category=c, prob=confs[i], x=x1/img_size_w, y=y1/img_size_h, w=(x2 - x1)/img_size_w, h=(y2 - y1)/img_size_h, ) bboxes.append(r) bboxes_batch.append(bboxes) return bboxes_batch
def recognize_from_image(detector, dst_path, src_dir, file_): # prepare input data #img = load_image(src_dir+"/"+file_) img = cv2.imread(src_dir + "/" + file_) h, w = img.shape[0], img.shape[1] if args.arch == "yolov3": img = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA) detector.compute(img, YOLOV3_THRESHOLD, YOLOV3_IOU) count = detector.get_object_count() else: # prepare input data img = cv2.imread(src_dir + "/" + file_) image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) image = cv2.resize( image, (BLAZEFACE_INPUT_IMAGE_WIDTH, BLAZEFACE_INPUT_IMAGE_HEIGHT)) image = image.transpose((2, 0, 1)) # channel first image = image[np.newaxis, :, :, :] # (batch_size, channel, h, w) input_data = image / 127.5 - 1.0 # inference preds_ailia = detector.predict([input_data]) # postprocessing detections = postprocess(preds_ailia) count = len(detections) texts = [] written = False for idx in range(count): if args.arch == "yolov3": # get detected face obj = detector.get_object(idx) margin = 1.0 else: # get detected face obj = detections[idx] d = obj[0] obj = ailia.DetectorObject(category=0, prob=1.0, x=d[1], y=d[0], w=d[3] - d[1], h=d[2] - d[0]) margin = 1.4 cx = (obj.x + obj.w / 2) * w cy = (obj.y + obj.h / 2) * h cw = max(obj.w * w * margin, obj.h * h * margin) fx = max(cx - cw / 2, 0) fy = max(cy - cw / 2, 0) fw = min(cw, w - fx) fh = min(cw, h - fy) top_left = (int(fx), int(fy)) bottom_right = (int((fx + fw)), int(fy + fh)) print("face detected " + str(top_left) + "-" + str(bottom_right)) # get detected face crop_img = img[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0], 0:3] if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0: continue cv2.imwrite(dst_path, crop_img) written = True if not written: print("face not found")
def recognize_from_frame(net, detector, frame): # detect face detections = compute_blazeface( detector, frame, anchor_path='../../face_detection/blazeface/anchorsback.npy', back=True, min_score_thresh=FACE_MIN_SCORE_THRESH) # adjust face rectangle new_detections = [] for detection in detections: margin = 1.5 r = ailia.DetectorObject( category=detection.category, prob=detection.prob, x=detection.x - detection.w * (margin - 1.0) / 2, y=detection.y - detection.h * (margin - 1.0) / 2 - detection.h * margin / 8, w=detection.w * margin, h=detection.h * margin, ) new_detections.append(r) detections = new_detections # estimate emotion for obj in detections: # get detected face margin = 1.0 crop_img, top_left, bottom_right = crop_blazeface(obj, margin, frame) if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0: continue crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY) emotion = predict(net, crop_img) idx = np.argmax(emotion) emotion = emotion_table[idx] # display label LABEL_WIDTH = bottom_right[1] - top_left[1] LABEL_HEIGHT = 20 color = (255, 128, 128) cv2.rectangle(frame, top_left, bottom_right, color, thickness=2) cv2.rectangle( frame, top_left, (top_left[0] + LABEL_WIDTH, top_left[1] + LABEL_HEIGHT), color, thickness=-1, ) text_position = (top_left[0], top_left[1] + LABEL_HEIGHT // 2) color = (0, 0, 0) fontScale = 0.5 cv2.putText( frame, emotion, text_position, cv2.FONT_HERSHEY_SIMPLEX, fontScale, color, 1, )
def recognize_from_frame(net, detector, frame): # detect face detections = compute_blazeface( detector, frame, anchor_path='../../face_detection/blazeface/anchorsback.npy', back=True, min_score_thresh=FACE_MIN_SCORE_THRESH) # adjust face rectangle new_detections = [] for detection in detections: margin = 1.5 r = ailia.DetectorObject( category=detection.category, prob=detection.prob, x=detection.x - detection.w * (margin - 1.0) / 2, y=detection.y - detection.h * (margin - 1.0) / 2 - detection.h * margin / 8, w=detection.w * margin, h=detection.h * margin, ) new_detections.append(r) detections = new_detections # estimate age and gender for obj in detections: # get detected face margin = 1.0 crop_img, top_left, bottom_right = crop_blazeface(obj, margin, frame) if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0: continue img = cv2.resize(crop_img, (IMAGE_SIZE, IMAGE_SIZE)) img = np.expand_dims(img, axis=0) # 次元合せ # inference output = net.predict([img]) prob, age_conv3 = output prob = prob[0][0][0] age_conv3 = age_conv3[0][0][0][0] i = np.argmax(prob) gender = 'Female' if i == 0 else 'Male' age = round(age_conv3 * 100) # display label LABEL_WIDTH = bottom_right[1] - top_left[1] LABEL_HEIGHT = 20 if gender == "Male": color = (255, 128, 128) else: color = (128, 128, 255) cv2.rectangle(frame, top_left, bottom_right, color, thickness=2) cv2.rectangle( frame, top_left, (top_left[0] + LABEL_WIDTH, top_left[1] + LABEL_HEIGHT), color, thickness=-1, ) text_position = (top_left[0], top_left[1] + LABEL_HEIGHT // 2) color = (0, 0, 0) fontScale = 0.5 cv2.putText( frame, "{} {}".format(gender, age), text_position, cv2.FONT_HERSHEY_SIMPLEX, fontScale, color, 1, )
def compute_blazeface_with_keypoint(detector, frame, anchor_path='anchors.npy', back=False, min_score_thresh=DEFAULT_MIN_SCORE_THRESH): if back: BLAZEFACE_INPUT_IMAGE_HEIGHT = 256 BLAZEFACE_INPUT_IMAGE_WIDTH = 256 else: BLAZEFACE_INPUT_IMAGE_HEIGHT = 128 BLAZEFACE_INPUT_IMAGE_WIDTH = 128 # preprocessing image = letterbox_convert( frame, (BLAZEFACE_INPUT_IMAGE_HEIGHT, BLAZEFACE_INPUT_IMAGE_WIDTH)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = image.transpose((2, 0, 1)) # channel first image = image[np.newaxis, :, :, :] # (batch_size, channel, h, w) input_data = image / 127.5 - 1.0 # inference preds_ailia = detector.predict([input_data]) # postprocessing face_detections = postprocess(preds_ailia, anchor_path, back=back, min_score_thresh=min_score_thresh) face_detections = face_detections[0] detections = [] detections_eyes = [] for i, d in enumerate(face_detections): # face position obj = ailia.DetectorObject(category=0, prob=1.0, x=d[1], y=d[0], w=d[3] - d[1], h=d[2] - d[0]) detections.append(obj) # keypoints obj = ailia.DetectorObject(category=0, prob=1.0, x=d[4], y=d[5], w=0, h=0) detections_eyes.append(obj) obj = ailia.DetectorObject(category=0, prob=1.0, x=d[6], y=d[7], w=0, h=0) detections_eyes.append(obj) # revert square from detections detections = reverse_letterbox( detections, frame, (BLAZEFACE_INPUT_IMAGE_HEIGHT, BLAZEFACE_INPUT_IMAGE_WIDTH)) detections_eyes = reverse_letterbox( detections_eyes, frame, (BLAZEFACE_INPUT_IMAGE_HEIGHT, BLAZEFACE_INPUT_IMAGE_WIDTH)) # convert to keypoints keypoints = [] for i in range(len(detections_eyes) // 2): keypoint = { "eye_left_x": detections_eyes[i * 2 + 0].x, "eye_left_y": detections_eyes[i * 2 + 0].y, "eye_right_x": detections_eyes[i * 2 + 1].x, "eye_right_y": detections_eyes[i * 2 + 1].y } keypoints.append(keypoint) return detections, keypoints
def compare_video(): # prepare base image fe_list = [] # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # detector initialize if args.face == "yolov3": detector = ailia.Detector(FACE_MODEL_PATH, FACE_WEIGHT_PATH, 1, format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id) else: detector = ailia.Net(FACE_MODEL_PATH, FACE_WEIGHT_PATH, env_id=env_id) # web camera if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[Error] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) # inference loop while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break h, w = frame.shape[0], frame.shape[1] # detect face if args.face == "yolov3": img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) detector.compute(img, YOLOV3_FACE_THRESHOLD, YOLOV3_FACE_IOU) count = detector.get_object_count() else: img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = cv2.resize( img, (BLAZEFACE_INPUT_IMAGE_WIDTH, BLAZEFACE_INPUT_IMAGE_HEIGHT)) image = image.transpose((2, 0, 1)) # channel first image = image[np.newaxis, :, :, :] # (batch_size, channel, h, w) input_data = image / 127.5 - 1.0 # inference preds_ailia = detector.predict([input_data]) # postprocessing detections = postprocess(preds_ailia) count = len(detections) texts = [] for idx in range(count): # get detected face if args.face == "yolov3": obj = detector.get_object(idx) margin = 1.0 else: obj = detections[idx] if len(obj) == 0: continue d = obj[0] obj = ailia.DetectorObject(category=0, prob=1.0, x=d[1], y=d[0], w=d[3] - d[1], h=d[2] - d[0]) margin = 1.4 cx = (obj.x + obj.w / 2) * w cy = (obj.y + obj.h / 2) * h cw = max(obj.w * w * margin, obj.h * h * margin) fx = max(cx - cw / 2, 0) fy = max(cy - cw / 2, 0) fw = min(cw, w - fx) fh = min(cw, h - fy) top_left = (int(fx), int(fy)) bottom_right = (int((fx + fw)), int(fy + fh)) # get detected face crop_img = img[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0], 0:3] if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0: continue crop_img, resized_frame = adjust_frame_size( crop_img, IMAGE_HEIGHT, IMAGE_WIDTH) # get matched face id_sim, score_sim = face_identification(fe_list, net, resized_frame) # display result fontScale = w / 512.0 thickness = 2 color = hsv_to_rgb(256 * id_sim / 16, 255, 255) cv2.rectangle(frame, top_left, bottom_right, color, 2) text_position = (int(fx) + 4, int((fy + fh) - 8)) cv2.putText(frame, f"{id_sim} : {score_sim:5.3f}", text_position, cv2.FONT_HERSHEY_SIMPLEX, fontScale, color, thickness) cv2.imshow('frame', frame) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')