def recognize_from_video(): # net initialize net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) detector = ailia.Net(FACE_MODEL_PATH, FACE_WEIGHT_PATH, env_id=args.env_id) capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: writer = webcamera_utils.get_writer(args.savepath, IMAGE_HEIGHT, IMAGE_WIDTH) else: writer = None while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break # detect face detections = compute_blazeface( detector, frame, anchor_path='../../face_detection/blazeface/anchors.npy', ) # get detected face if len(detections) == 0: crop_img = frame else: crop_img, top_left, bottom_right = crop_blazeface( detections[0], FACE_MARGIN, frame) if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0: crop_img = frame # preprocess input_image, input_data = webcamera_utils.preprocess_frame( crop_img, IMAGE_HEIGHT, IMAGE_WIDTH, data_rgb=False) # inference preds_ailia = net.predict(input_data)[0] # postprocessing fig = gen_img_from_predsailia(input_data, preds_ailia) fig.savefig('tmp.png') img = cv2.imread('tmp.png') cv2.imshow('frame', img) # save results if writer is not None: img = cv2.resize(img, (IMAGE_WIDTH, IMAGE_HEIGHT)) writer.write(img) capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() os.remove('tmp.png') print('Script finished successfully.')
def recognize_from_frame(net, detector, frame): spoof_thresh = args.spoof_thresh # detect face detections = compute_blazeface( detector, frame, anchor_path='../../face_detection/blazeface/anchorsback.npy', back=True, min_score_thresh=FACE_MIN_SCORE_THRESH) # adjust face rectangle new_detections = [] for detection in detections: margin = 1.5 r = ailia.DetectorObject( category=detection.category, prob=detection.prob, x=detection.x - detection.w * (margin - 1.0) / 2, y=detection.y - detection.h * (margin - 1.0) / 2 - detection.h * margin / 8, w=detection.w * margin, h=detection.h * margin, ) new_detections.append(r) # crop, preprocess images = [] detections = [] for obj in new_detections: # get detected face margin = 1.0 crop_img, top_left, bottom_right = crop_blazeface(obj, margin, frame) if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0: continue img = preprocess(crop_img) images.append(img) detections.append( (top_left[0], top_left[1], bottom_right[0], bottom_right[1])) if not images: return frame images = np.concatenate(images) # feedforward output = net.predict([images]) logits = output[0] preds = softmax(logits, axis=1) frame = draw_detections(frame, detections, preds, spoof_thresh) return frame
def recognize_from_video(): # net initialize net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) if args.active_3d: logger.info('>>> 3D mode is activated!') depth_net = ailia.Net(DEPTH_MODEL_PATH, DEPTH_WEIGHT_PATH, env_id=args.env_id) detector = ailia.Net(FACE_MODEL_PATH, FACE_WEIGHT_PATH, env_id=args.env_id) capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: logger.warning('[WARNING] currently video results output feature ' 'is not supported in this model!') # TODO: shape should be debugged! f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) writer = webcamera_utils.get_writer(args.savepath, f_h, f_w) else: writer = None fig, axs = create_figure(active_3d=args.active_3d) while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break # detect face detections = compute_blazeface( detector, frame, anchor_path='../../face_detection/blazeface/anchors.npy', ) # get detected face if len(detections) == 0: crop_img = frame else: crop_img, top_left, bottom_right = crop_blazeface( detections[0], FACE_MARGIN, frame) if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0: crop_img = frame # preprocess input_image, input_data = webcamera_utils.preprocess_frame( crop_img, IMAGE_HEIGHT, IMAGE_WIDTH, normalize_type='255') # inference preds_ailia = net.predict(input_data) pts, pts_img = get_preds_from_hm(preds_ailia) pts, pts_img = pts.reshape(68, 2) * 4, pts_img.reshape(68, 2) if args.active_3d: # 3D mode heatmaps = np.zeros((68, IMAGE_HEIGHT, IMAGE_WIDTH), dtype=np.float32) for i in range(68): if pts[i, 0] > 0: heatmaps[i] = draw_gaussian(heatmaps[i], pts[i], 2) heatmaps = heatmaps[np.newaxis, :, :, :] depth_pred = depth_net.predict( np.concatenate((input_data, heatmaps), 1)) depth_pred = depth_pred.reshape(68, 1) pts_img = np.concatenate((pts_img, depth_pred * 2), 1) resized_img = cv2.resize(cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB), (IMAGE_WIDTH, IMAGE_HEIGHT)) # visualize results (clear axs at first) axs = visualize_results(axs, resized_img, pts_img, active_3d=args.active_3d) plt.pause(0.01) if not plt.get_fignums(): break # save results # FIXME: How to save plt --> cv2.VideoWriter() # if writer is not None: # # put pixel buffer in numpy array # canvas = FigureCanvas(fig) # canvas.draw() # mat = np.array(canvas.renderer._renderer) # res_img = cv2.cvtColor(mat, cv2.COLOR_RGB2BGR) # writer.write(res_img) capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() logger.info('Script finished successfully.')
def recognize_from_video(): # net initialize emotion_classifier = ailia.Classifier( EMOTION_MODEL_PATH, EMOTION_WEIGHT_PATH, env_id=args.env_id, format=ailia.NETWORK_IMAGE_FORMAT_GRAY, range=ailia.NETWORK_IMAGE_RANGE_S_FP32, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, ) gender_classifier = ailia.Classifier( GENDER_MODEL_PATH, GENDER_WEIGHT_PATH, env_id=args.env_id, format=ailia.NETWORK_IMAGE_FORMAT_GRAY, range=ailia.NETWORK_IMAGE_RANGE_S_FP32, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, ) detector = ailia.Net(FACE_MODEL_PATH, FACE_WEIGHT_PATH, env_id=args.env_id) capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath is not None: print('[WARNING] currently video results output feature ' 'is not supported in this model!') # TODO: shape should be debugged! f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) writer = webcamera_utils.get_writer(args.savepath, f_h, f_w) else: writer = None while(True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break # detect face # WIP: FIXME: AiliaInvalidArgumentException error detections = compute_blazeface( detector, frame, anchor_path='../../face_detection/blazeface/anchors.npy', ) for obj in detections: # get detected face crop_img, top_left, bottom_right = crop_blazeface( obj, FACE_MARGIN, frame ) if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0: continue crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2BGRA) # emotion inference emotion_classifier.compute(crop_img, EMOTION_MAX_CLASS_COUNT) count = emotion_classifier.get_class_count() print('=' * 80) print(f'emotion_class_count={count}') # print result emotion_text = "" for idx in range(count): print(f'+ idx={idx}') info = emotion_classifier.get_class(idx) print( f' category={info.category} ' + f'[ {EMOTION_CATEGORY[info.category]} ]' ) print(f' prob={info.prob}') if idx == 0: emotion_text = (f'[ {EMOTION_CATEGORY[info.category]} ] ' f'prob={info.prob:.3f}') print('') # gender inference gender_text = "" gender_classifier.compute(crop_img, GENDER_MAX_CLASS_COUNT) count = gender_classifier.get_class_count() # print reuslt for idx in range(count): print(f'+ idx={idx}') info = gender_classifier.get_class(idx) print( f' category={info.category} ' + f'[ {GENDER_CATEGORY[info.category]} ]' ) print(f' prob={info.prob}') if idx == 0: gender_text = (f'[ {GENDER_CATEGORY[info.category]} ] ' f'prob={info.prob:.3f}') print('') # display label LABEL_WIDTH = 400 LABEL_HEIGHT = 20 color = (255, 255, 255) cv2.rectangle(frame, top_left, bottom_right, color, thickness=2) cv2.rectangle( frame, top_left, (top_left[0]+LABEL_WIDTH, top_left[1]+LABEL_HEIGHT), color, thickness=-1, ) text_position = (top_left[0], top_left[1]+LABEL_HEIGHT//2) color = (0, 0, 0) fontScale = 0.5 cv2.putText( frame, emotion_text + " " + gender_text, text_position, cv2.FONT_HERSHEY_SIMPLEX, fontScale, color, 1, ) # show result cv2.imshow('frame', frame) time.sleep(SLEEP_TIME) # save results if writer is not None: writer.write(frame) capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() print('Script finished successfully.')
def recognize_from_frame(net, detector, frame): # detect face detections = compute_blazeface( detector, frame, anchor_path='../../face_detection/blazeface/anchorsback.npy', back=True, min_score_thresh=FACE_MIN_SCORE_THRESH) # adjust face rectangle new_detections = [] for detection in detections: margin = 1.5 r = ailia.DetectorObject( category=detection.category, prob=detection.prob, x=detection.x - detection.w * (margin - 1.0) / 2, y=detection.y - detection.h * (margin - 1.0) / 2 - detection.h * margin / 8, w=detection.w * margin, h=detection.h * margin, ) new_detections.append(r) detections = new_detections # estimate emotion for obj in detections: # get detected face margin = 1.0 crop_img, top_left, bottom_right = crop_blazeface(obj, margin, frame) if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0: continue crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY) emotion = predict(net, crop_img) idx = np.argmax(emotion) emotion = emotion_table[idx] # display label LABEL_WIDTH = bottom_right[1] - top_left[1] LABEL_HEIGHT = 20 color = (255, 128, 128) cv2.rectangle(frame, top_left, bottom_right, color, thickness=2) cv2.rectangle( frame, top_left, (top_left[0] + LABEL_WIDTH, top_left[1] + LABEL_HEIGHT), color, thickness=-1, ) text_position = (top_left[0], top_left[1] + LABEL_HEIGHT // 2) color = (0, 0, 0) fontScale = 0.5 cv2.putText( frame, emotion, text_position, cv2.FONT_HERSHEY_SIMPLEX, fontScale, color, 1, )
def recognize_from_frame(net, detector, frame): # detect face detections = compute_blazeface( detector, frame, anchor_path='../../face_detection/blazeface/anchorsback.npy', back=True, min_score_thresh=FACE_MIN_SCORE_THRESH) # adjust face rectangle new_detections = [] for detection in detections: margin = 1.5 r = ailia.DetectorObject( category=detection.category, prob=detection.prob, x=detection.x - detection.w * (margin - 1.0) / 2, y=detection.y - detection.h * (margin - 1.0) / 2 - detection.h * margin / 8, w=detection.w * margin, h=detection.h * margin, ) new_detections.append(r) detections = new_detections # estimate age and gender for obj in detections: # get detected face margin = 1.0 crop_img, top_left, bottom_right = crop_blazeface(obj, margin, frame) if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0: continue img = cv2.resize(crop_img, (IMAGE_SIZE, IMAGE_SIZE)) img = np.expand_dims(img, axis=0) # 次元合せ # inference output = net.predict([img]) prob, age_conv3 = output prob = prob[0][0][0] age_conv3 = age_conv3[0][0][0][0] i = np.argmax(prob) gender = 'Female' if i == 0 else 'Male' age = round(age_conv3 * 100) # display label LABEL_WIDTH = bottom_right[1] - top_left[1] LABEL_HEIGHT = 20 if gender == "Male": color = (255, 128, 128) else: color = (128, 128, 255) cv2.rectangle(frame, top_left, bottom_right, color, thickness=2) cv2.rectangle( frame, top_left, (top_left[0] + LABEL_WIDTH, top_left[1] + LABEL_HEIGHT), color, thickness=-1, ) text_position = (top_left[0], top_left[1] + LABEL_HEIGHT // 2) color = (0, 0, 0) fontScale = 0.5 cv2.putText( frame, "{} {}".format(gender, age), text_position, cv2.FONT_HERSHEY_SIMPLEX, fontScale, color, 1, )