import progressbar import time from mtcnn.mtcnn import MTCNN program_name = 'mtcnn_face_alignment' input_filename = '1.no_motion_resize' output_filename = '{:s}_{:s}'.format(input_filename, program_name) video_input = './videos/{:s}.mp4'.format(input_filename) video_output = './videos/result/{:s}.mp4'.format(output_filename) if not os.path.exists('./videos/result/'): os.makedirs('./videos/result/') if not os.path.exists('./csv/'): os.makedirs('./csv/') detector = MTCNN() vidin = cv2.VideoCapture(video_input) ret, frame = vidin.read() fps = vidin.get(cv2.CAP_PROP_FPS) frames = vidin.get(cv2.CAP_PROP_FRAME_COUNT) results = {} print(' Video FPS rate is {}'.format(fps)) print(' {} total frames'.format(frames)) print(' Frame size : {}'.format(frame.shape)) # Define the codec and create VideoWriter object fourcc = cv2.VideoWriter_fourcc(*'MP4V') vidout = cv2.VideoWriter(video_output, fourcc, fps, (frame.shape[1], frame.shape[0]))
import cv2 from mtcnn.mtcnn import MTCNN detector = MTCNN() #cap = cv2.VideoCapture(0) #while(True): image = cv2.imread("ThanhABC_1.jpg") # ret, image = cap.read() result = detector.detect_faces(image) img=image for person in result: bounding_box = person['box'] keypoints = person['keypoints'] # cv2.rectangle(image, # (bounding_box[0], bounding_box[1]), # (bounding_box[0]+bounding_box[2], bounding_box[1] + bounding_box[3]), # (0,155,255), # 2) # cv2.circle(image,(keypoints['left_eye']), 2, (0,155,255), 2) # cv2.circle(image,(keypoints['right_eye']), 2, (0,155,255), 2) # cv2.circle(image,(keypoints['nose']), 2, (0,155,255), 2) # cv2.circle(image,(keypoints['mouth_left']), 2, (0,155,255), 2) # cv2.circle(image,(keypoints['mouth_right']), 2, (0,155,255), 2) img = image[bounding_box[1]:bounding_box[1]+bounding_box[3], bounding_box[0]:bounding_box[0] + bounding_box[2]] cv2.imshow("image",img)
import tensorflow as tf import numpy as np import os import cv2 from mtcnn.mtcnn import MTCNN from sklearn import cross_validation from task4 import * tf.enable_eager_execution() if __name__ == '__main__': cap = cv2.VideoCapture(0) detector = MTCNN() model = CNN() model.load_weights(r"F:\python3\renlianshibie\CNNmodel") #DIR = r"F:\python3\renlianshibie\faceImageGray" #names_dict = name_dict(DIR) names_dict = {'0': 'huajinqing', '1': 'liangchunfu', '2': 'lijunyu', '3': 'linjuncheng', '4': 'linweixin', '5': 'liujunhao', '6': 'xuhaolin', '7': 'zenglingqi', '8': 'zhouyuanxiang', '9': 'zhushichao'} print("按z退出摄像头") while(True): ret, frame = cap.read() # 读取一帧的图像 z = detector.detect_faces(frame)
def __init__(self, path, optimize, minfacesize): from mtcnn.mtcnn import MTCNN # lazy loading self._optimize = optimize self._minfacesize = minfacesize self._detector = MTCNN(min_face_size=minfacesize)
from matplotlib import pyplot as plt from facemodel import face_recognition import cv2 from mtcnn.mtcnn import MTCNN detector = MTCNN() cap = cv2.VideoCapture(0) # Default resolutions of the frame are obtained.The default resolutions are system dependent. # We convert the resolutions from float to integer. frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file. out = cv2.VideoWriter('outpy.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height)) while True: #Capture frame-by-frame __, frame = cap.read() # rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) rgb = frame #Use MTCNN to detect faces result = detector.detect_faces(rgb) if result != []: for face in result: bounding_box = face['box'] # keypoints = face['keypoints'] x, y, w, h = bounding_box[0], bounding_box[1], bounding_box[2], bounding_box[3] rect_face = cv2.rectangle(frame, (x, y), (x+w, y+h), (46, 204, 113), 2)
'--camera', default=0, type=int, help= 'Select the camera using the port with the command "ls -ltrh /dev/video*".' ) parser.add_argument('--video', default=None, type=str, help='Select the video path') args = parser.parse_args() # Network initialisation detector = MTCNN() # Selection between camera and video, assign cap variable to it if args.video == None: cap = cv2.VideoCapture(args.camera) else: cap = cv2.VideoCapture(args.video) previous = 0 while True: #Capture frame-by-frame __, frame = cap.read() start = time.time()
class VideoCamera(object): def __init__(self, model, graph): global video self.video = cv2.VideoCapture(0) self.gender_model = model self.graph = graph self.detector = MTCNN() def __del__(self): global video self.video.release() def process_img(self, face): image = Image.fromarray(face) image = image.resize((224, 224)) face_array = np.asarray(image) face_array = face_array.reshape(1, 224, 224, 3) with self.graph.as_default(): gen = self.gender_model.predict(face_array) if gen[0][0] == 1: text = "MALE" else: text = "FEMALE" return text def get_frame(self): global video # grabs webcam image ret, frame = self.video.read() # DO WHAT YOU WANT WITH TENSORFLOW / KERAS AND OPENCV # print(frame.shape) # detects faces with mtcnn. If no face detected. Except catches error and returns normal webcam image try: results = self.detector.detect_faces(frame) except: ret, jpeg = cv2.imencode('.jpg', frame) return jpeg.tobytes() if len(results) == 0: ret, jpeg = cv2.imencode('.jpg', frame) return jpeg.tobytes() # getting outline of face imshape = frame.shape x1, y1, width, height = results[0]['box'] a, b, c, d = x1, y1, (x1 + width), (y1 + height) #Box for display only # x1 = int(0.5*x1) # y1 = int(0.5*y1) x2, y2 = x1 + width, y1 + height # x2 = int(x2+0.5*(imshape[1]-x2)) # y2 = int(y2+0.5*(imshape[0]-y2)) # cropping to face face = frame[y1:y2, x1:x2] # about bounding box text = self.process_img(face) cv2.rectangle(frame, (a, b), (c, d), (0, 255, 0), 2) cv2.putText(frame, text, (5, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), lineType=cv2.LINE_AA) ret, jpeg = cv2.imencode('.jpg', frame) return jpeg.tobytes()
class FaceDetector: # constant locators for landmarks jaw_points = np.arange(0, 17) # face contour points eyebrow_dx_points = np.arange(17, 22) eyebrow_sx_points = np.arange(22, 27) nose_points = np.arange(27, 36) nosecenter_points = np.array([30, 33]) right_eye = np.arange(36, 42) left_eye = np.arange(42, 48) def __init__(self, config): self.config = config # if specified use mtcnn model self.mtcnn_model_path = config.get('mtcnn_model_path', None) if self.mtcnn_model_path: from mtcnn.mtcnn import MTCNN self.detector = MTCNN() # otherwise rely on dlib detector else: self.detector_model_path = config.get('detector_path', None) if self.detector_model_path: self.detector = dlib.cnn_face_detection_model_v1( self.detector_model_path) else: self.detector = dlib.get_frontal_face_detector() # always instantiate predictor self.predictor = dlib.shape_predictor( config.get('shape_predictor_path')) def _mtcnn_detect_faces(self, img): face_confidence_threshold = self.config['mtcnn_confidence_threshold'] rects = self.detector.detect_faces(img) faces = [ Face( img.copy(), # output bbox coordinate of MTCNN is [x, y, width, height] # need to max to 0 cause sometimes bbox has negative values ??library BUG Face.Rectangle(left=max(r['box'][0], 0), top=max(r['box'][1], 0), right=max(r['box'][0], 0) + max(r['box'][2], 0), bottom=max(r['box'][1], 0) + max(r['box'][3], 0))) for r in rects if r['confidence'] > face_confidence_threshold ] return faces def detect_faces(self, img): if self.mtcnn_model_path: faces = self._mtcnn_detect_faces(img) else: rects = self.detector(img, 1) # if using custom detector we need to get the rect attribute if self.detector_model_path: rects = [r.rect for r in rects] faces = [ Face( img.copy(), Face.Rectangle(top=max(r.top(), 0), right=max(r.right(), 0), bottom=max(r.bottom(), 0), left=max(r.left(), 0))) for r in rects ] # continue only if we detected at least one face if len(faces) == 0: logging.debug("No face detected") raise FaceExtractException("No face detected.") for face in faces: face.landmarks = self.get_landmarks(face) return faces def get_landmarks(self, face: Face, recompute=False): # If landmarks already present, just return, unless is required to recompute them if face.landmarks is not None and not recompute: return face.landmarks else: # we need a dlib rectangle to get the landmarks dlib_rect = dlib.rectangle(left=face.rect.left, top=face.rect.top, right=face.rect.right, bottom=face.rect.bottom) shape = self.predictor(face.img, dlib_rect) return np.array([(p.x, p.y) for p in shape.parts()]) @staticmethod def get_eyes(face: Face): lx_eye = face.landmarks[FaceDetector.left_eye] rx_eye = face.landmarks[FaceDetector.right_eye] return lx_eye, rx_eye @staticmethod def get_contour_points(shape): # shape to numpy points = np.array([(p.x, p.y) for p in shape.parts()]) face_boundary = points[np.concatenate([ FaceDetector.jaw_points, FaceDetector.eyebrow_dx_points, FaceDetector.eyebrow_sx_points ])] return face_boundary, shape.rect def extract_face(self, face: Face): """ Utility method which uses directly the current detector configuration for the generic extraction operation :param face: :return: """ # size is a tuple, so need to eval from string representation in config size = literal_eval(self.config['extract']['size']) border_expand = literal_eval(self.config['extract']['border_expand']) align = self.config['extract']['align'] maintain_proportion = self.config['extract']['maintain_proportion'] masked = self.config['extract']['masked'] return self._extract_face(face, size, border_expand=border_expand, align=align, maintain_proportion=maintain_proportion, masked=masked) def _extract_face(self, face: Face, out_size=None, border_expand=(0., 0.), align=False, maintain_proportion=False, masked=False): face_size = face.get_face_size() border_expand = (int(border_expand[0] * face_size[0]), int(border_expand[1] * face_size[1])) # if not specified otherwise, we want extracted face size to be exactly as input face size if not out_size: out_size = face_size face.landmarks = self.get_landmarks(face) if masked: mask = utils.get_face_mask( face, 'hull', erosion_size=literal_eval(self.config['extract'].get( 'dilation_kernel', 'None')), dilation_kernel=literal_eval(self.config['extract'].get( 'dilation_kernel', 'None')), blur_size=int(self.config['extract']['blur_size'])) # black all pixels outside the mask face.img = cv2.bitwise_and(face.img, face.img, mask=mask[:, :, 1]) # keep proportions of original image (rect) for extracted image, otherwise resize might stretch the content if maintain_proportion: border_delta = self._get_maintain_proportion_delta( face_size, out_size) border_expand = (border_expand[0] + int(border_delta[0] // 2), border_expand[1] + int(border_delta[1] // 2)) if align: cut_face = utils.ffhq_align(face, output_size=out_size[0], boundary_resize_factor=border_expand) #cut_face, _ = utils.align_face(face, boundary_resize_factor=border_expand) #cut_face = utils._align_face(face, size=out_size) else: cut_face = cv2.resize(face.get_face_img(), out_size, interpolation=cv2.INTER_CUBIC) return cut_face def _get_maintain_proportion_delta(self, src_size, dest_size): """ Return delta amount to maintain destination proportion given source size. Tuples order is (w, h) :param base_border: :param src_size: :param dest_size: :return: """ dest_ratio = max(dest_size) / min(dest_size) delta_h = delta_w = 0 w, h = src_size if w > h: delta_h = w * dest_ratio - h else: delta_w = h * dest_ratio - w return delta_w, delta_h
def get_frame(self): _, fr = self.video.read() # fr = imutils.resize( fr, width=400 ) # # modelFile = "res10_300x300_ssd_iter_140000.caffemodel" # configFile = "deploy.txt" # net = cv2.dnn.readNetFromCaffe(configFile, modelFile ) # (h, w) = fr.shape[:2] # blob = cv2.dnn.blobFromImage(cv2.resize( fr, (300, 300)), 1.0, # (300, 300), (104.0, 177.0, 123.0) ) # net.setInput( blob ) # detections = net.forward() # # loop over the detections # for i in range( 0, detections.shape[2] ): # # extract the confidence (i.e., probability) associated with the # # prediction # confidence = detections[0, 0, i, 2] # # filter out weak detections by ensuring the `confidence` is # # greater than the minimum confidence # if confidence < 0.75: # continue # # compute the (x, y)-coordinates of the bounding box for the # # object # box = detections[0, 0, i, 3:7] * np.array( [w, h, w, h] ) # (startX, startY, endX, endY) = box.astype( "int" ) # # # draw the bounding box of the face along with the associated # # probability # text = "{:.2f}%".format( confidence * 100 ) # y = startY - 10 if startY - 10 > 10 else startY + 10 # cv2.rectangle( fr, (startX, startY), (endX, endY), # (0, 0, 255), 2 ) # cv2.putText( fr, text, (startX, y), # cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2 ) # # pixels = np.asarray(fr) detector = MTCNN() result = detector.detect_faces(pixels) if result: for person in result: bounding_box = person['box'] cv2.rectangle(fr, (bounding_box[0], bounding_box[1]), (bounding_box[0] + bounding_box[2], bounding_box[1] + bounding_box[3]), (0, 155, 255), 2) fc = pixels[bounding_box[1]:bounding_box[1] + bounding_box[3], bounding_box[0]:bounding_box[0] + bounding_box[2]] print(fc) roi = cv2.resize(fc, (224, 224)) print(roi) with session.graph.as_default(): k.backend.set_session(session) pred = model.predict_emotion(roi[np.newaxis, :, :]) cv2.putText(fr, pred, (bounding_box[0], bounding_box[1]), font, 2, (0, 0, 255), 3) # # # # cv2.rectangle( fr, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (0, 255, 0), 2 ) _, jpeg = cv2.imencode('.jpg', fr) return jpeg.tobytes()
class FacenetEngine(object): """ facenet engine class """ # クラス変数 # encodeのベクトルサイズ = 128, それぞれの値は-2~2である。 # そのため、max_distance = 11.3 __distance_threshold = 11.0 __debug_mode = True __bound = 18 __encode_features_vector_length = 128 def __init__(self): """ コンストラクト """ cur_dir = os.path.abspath(os.path.join(os.path.dirname(__file__))) # facenet model path model_path = "./model/keras-facenet/model/facenet_keras.h5" model_path = os.path.join(cur_dir, model_path) # モデルパスの存在チェック if osp.exists(model_path) is False: raise ValueError('{} not Exist'.format(model_path)) # set for facenet model self.__model_path = model_path self.model = load_model(self.__model_path, compile=False) # transfer learning model self.transfer_model = self.make_transfer_learning_model() print(self.transfer_model.input) print(self.transfer_model.output) # create new detector, using default weights from mtcnn self.__detector = MTCNN() # set classifier model classifier_filename = "./model/SVM_classifer.pkl" classifier_filename = os.path.join(cur_dir, classifier_filename) self.__classifier_filename = classifier_filename # TODO: remove these self.__data_set_path = "../../../dataset/train/japanese/" sub_dirs = glob(self.__data_set_path + '*/') self.__people = [os.path.dirname(sub_dir) for sub_dir in sub_dirs] # Get current data from DB self.all_anchors = db_util.get_all_encode() # -------------------------------------------------------------------------------- # Public function # -------------------------------------------------------------------------------- def recognize(self, image_path, image_data=None): """ 指定するイメージを認証する :param image_path: :return: name """ errcode, name, user_id, department = 0, 'Unknown', -1, 'Unknown' # check arguments if image_data is None: if osp.exists(image_path) is False: raise ValueError('file not found {}'.format(image_path)) # make encode from image_path if image_data is None: errcode, img_encode = self.make_encode(image_path) else: errcode, img_encode = self.make_encode(image_path, image_data=image_data) if errcode is 0: # get all encodes fromdb all_anchors = self.all_anchors distances = list() for anchor in all_anchors: if len(anchor['encode']) == FacenetEngine.__encode_features_vector_length: distances.append(distance.euclidean(img_encode, anchor['encode'])) if FacenetEngine.__debug_mode is True: print('img_coode = {}'.format(type(img_encode))) print("length of all encode in database: {}".format(len(all_anchors))) print("min of distances = {}".format(min(distances))) print("max of distances = {}".format(max(distances))) if np.max(distances) < FacenetEngine.__bound: if min(distances) < FacenetEngine.__distance_threshold: anchor_idx = distances.index(min(distances)) name = all_anchors[anchor_idx]['name'] user_id = all_anchors[anchor_idx]['id'] department = all_anchors[anchor_idx]['department'] else: print(distances) print(FacenetEngine.__distance_threshold) if FacenetEngine.__debug_mode is True: print('name = {}, id = {}, department = {}'.format(name, user_id, department)) return errcode, name, user_id, department ''' Training SVM ''' def extract_face(self, file_path, image_data=None, required_size=(160, 160)): """ extract face for further steps Calling:: faces = extract_face(file_path) Args:: _ filename: path of images file _ require_size: required size of training model Returns:: _ face_array: Numpy array contains bounding box information - Details:: - get_trained_data """ errcode, face_array = 0, np.array([]) if image_data is None: # load image from file image = Image.open(file_path) else: image = image_data # convert to RGB, if needed img = image.convert('RGB') # conver to array pixels = asarray(img) # detect faces in the image results = self.__detector.detect_faces(pixels) if len(results) < 1: errcode = -1 else: # extract the bounding box x1, y1, width, height = results[0]['box'] # resize pixels to the model size x1, y1 = abs(x1), abs(y1) x2, y2 = x1 + width, y1 + height # extract the face face = pixels[y1:y2, x1:x2] # TODO: Debug時に、以下の画像を出力する。 # cv2.imwrite("check.jpg", face) # resize pixels to required size of further steps img = Image.fromarray(face) img = img.resize(required_size) face_array = asarray(img) return errcode, face_array def extract_face_for_preprocessing(self, file_path, required_size=(160, 160)): """ extract face for further steps Calling:: faces = extract_face(file_path) Args:: _ filename: path of images file _ require_size: required size of training model Returns:: _ face_array: Numpy array contains bounding box information - Details:: - get_trained_data """ errcode, face_array = 0, np.array([]) # load image from file image = Image.open(file_path) # convert to RGB, if needed img = image.convert('RGB') # conver to array pixels = asarray(img) # detect faces in the image results = self.__detector.detect_faces(pixels) if len(results) < 1: errcode = -1 else: # extract the bounding box x1, y1, width, height = results[0]['box'] # resize pixels to the model size x1, y1 = abs(x1), abs(y1) x2, y2 = x1 + width, y1 + height # extract the face face = pixels[y1:y2, x1:x2] # resize pixels to required size of further steps img = Image.fromarray(face) img = img.resize(required_size) face_array = asarray(img) return errcode, face_array def load_data_set(self, require_size=(160, 160)): """ Load face locations from data_set Calling:: faces = load_faces(directory) Args:: - Returns:: - asarray (X): Numpy array contains bounding box information for face position - asarray(Y): Numpy array contains labels Raises:: - Details:: - Load face locations from data_set """ X, Y = list(), list() # enumerate folders, on per class for subdir in listdir(self.__data_set_path): faces = list() # path path = self.__data_set_path + subdir + '/' # skip any files that might be in the dir if not isdir(path): continue for name in listdir(path): file_path = path + name print(file_path) # extract face face = self.extract_face(file_path) faces.append(face) # create labels labels = [subdir for _ in range(len(faces))] # summarize progress print('>loaded %d examples for class: %s' % (len(faces), subdir)) # storing faces X.extend(faces) Y.extend(labels) return asarray(X), asarray(Y) def convert(self, faces): """ Load faces dataset (160, 160, 3) to encode into embedding 128d vector """ new = list() # Training dataset # Convert each face to an encoding for face in faces: embed = self.encoding(self.model, face) new.append(embed) new = np.asarray(new) # Checking new dataset dimemsion return new @staticmethod def encoding(model, faces): """ Load facenet pretrained model and encoding using predict function of Keras """ # Scale pixel values faces = faces.astype('float32') # Standardize pixel value across channels (global) mean, std = faces.mean(), faces.std() faces = (faces - mean) / std # Transform face into one sample samples = np.expand_dims(faces, axis=0) # Make prediction to get encoding Y_hat = model.predict(samples) # TODO: Normalizationが必要かどうかを要検討 # Y_hat_norm = [((i - min(Y_hat[0])) / (max(Y_hat[0]) - min(Y_hat[0]))) for i in Y_hat[0]] return Y_hat[0] @staticmethod def l2_normalizer(x, axis=-1, epsilon=1e-10): """ 標準化 """ output = x / np.sqrt(np.maximum(np.sum(np.square(x), axis=axis, keepdims=True), epsilon)) return output def make_faces_encoding_labels(self): """ 画像より128次元の特徴値に変換する """ faces, labels = self.load_data_set() print(faces.shape) # データ数, 160, 160, 3 print(labels.shape) # データ数 # Encoding faces faces_encoding = self.convert(faces) print(faces_encoding.shape) # データ数, 128 # Normalize faces_encoding = self.l2_normalizer(faces_encoding) return faces_encoding, labels def train(self): """ Train SVM model on given dataset """ encodes = db_util.get_all_encode() faces_encoding = [] labels = [] for encode in encodes: if len(encode['encode']) == FacenetEngine.__encode_features_vector_length: faces_encoding.append(encode['encode']) labels.append(encode['id']) else: print("length is not {} encode: {}".format(FacenetEngine.__encode_features_vector_length, len(encode['encode']))) # Label encode targets encoder = LabelEncoder() encoder.fit(labels) normalized_labels = encoder.transform(labels) normalized_labels = np.array(normalized_labels) faces_encoding = np.array(faces_encoding) # Fit into SVM model model = SVC(kernel='linear', probability=True) model.fit(faces_encoding, normalized_labels) joblib.dump(model, self.__classifier_filename) print('Save') def preprocessing(self, input_folder, output_folder): """ Extract face from input image and save as output image Args: - input_folder(str) : path of input data folder (will process all image in all sub dir of input folder) - output_folder(str) : path of output data folder (output folder structure is the same as structure of input data folder) Details: - input images size : any - output images size : 160*160*3 (RGB) """ for cur, dirs, _ in os.walk(input_folder): for sub_dir in dirs: for curDir, subDirs, files in os.walk(os.path.join(input_folder, sub_dir)): for file in files: file_path = os.path.join(curDir, file) filename, file_extension = os.path.splitext(file_path) out_path = os.path.join(output_folder, sub_dir) if not os.path.exists(out_path): os.mkdir(out_path) output_file_path = os.path.join(out_path, file) if 'jpeg' in file_extension: errcode, face = self.extract_face_for_preprocessing(file_path) if errcode is 0: try: pil_img = Image.fromarray(face) pil_img.save(output_file_path) except Exception as e: print("process image {} get error {}".format(file, e)) else: print("process image {} get error when extract face".format(file)) def make_transfer_learning_model(self): """ making transfer learning model from facenet input: 160,160,3 output: 128 """ model = self.model # Freeze the layers for layer in model.layers[:424]: layer.trainable = False model.layers.pop() # Adding custom Layers x = model.layers[-1].output predictions = Dense(26, activation="softmax", kernel_regularizer=regularizers.l2(0.01))(x) # creating the final model model_final = Model(input=model.input, output=predictions) return model_final def transfer_learning(self, train_data_dir, validation_data_dir, epochs): # compile the model self.transfer_model.compile(loss="categorical_crossentropy", optimizer=optimizers.SGD(lr=1e-3, momentum=0.9), metrics=["accuracy"]) # Initiate the train and test generators with data Augumentation # Save the model according to the conditions checkpoint = ModelCheckpoint("facenet_transfer_weight.h5", monitor='val_accuracy', verbose=2, save_best_only=True, save_weights_only=False, mode='auto', period=1) early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=100, verbose=1, mode='auto') temp_path = os.path.join(os.getcwd(), "temp") train_data_path = os.path.join(temp_path, "train") val_data_path = os.path.join(temp_path, "val") # doing preprocessing when temp dir not exist if not os.path.exists(temp_path): os.mkdir(temp_path) if not os.path.exists(train_data_path): os.mkdir(train_data_path) if not os.path.exists(val_data_path): os.mkdir(val_data_path) self.preprocessing(train_data_dir, train_data_path) self.preprocessing(validation_data_dir, val_data_path) train_datagen = ImageDataGenerator( featurewise_center=True, featurewise_std_normalization=True, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True) test_datagen = ImageDataGenerator(rescale=1. / 255) train_generator = train_datagen.flow_from_directory( train_data_path, target_size=(160, 160), batch_size=32, class_mode="categorical") validation_generator = test_datagen.flow_from_directory( val_data_path, target_size=(160, 160), class_mode="categorical") # Train the model history = self.transfer_model.fit_generator( train_generator, steps_per_epoch=2, epochs=epochs, validation_data=validation_generator, validation_steps=2, callbacks=[checkpoint, early]) return history ''' Predicting ''' def make_encode(self, input_image, image_data=None): """ Make embedding vector (128-dimensions) from one image """ errcode, embed, face_img_receiver_mode = 0, np.array([]), True if image_data is None: errcode, face = self.extract_face(input_image) else: # TODO: (CongThanh) Consider when merge with facenet_engine.py # Add-in for face data receiver # NOTE: if image shape equals to face shape if face_img_receiver_mode: img = image_data.convert('RGB') # conver to array pixels = asarray(img) faces = [] faces.append(pixels) embed = self.convert(faces) else: errcode, face = self.extract_face(input_image, image_data=image_data) if errcode is 0: faces = [] faces.append(face) embed = self.convert(faces) return errcode, embed def predict(self, input_image): """ Predicting the class of input image using pretrained model on Japanese dataset """ errcode, predictions = 0, None errcode, embed = self.make_encode(input_image) if errcode is 0: model = joblib.load(self.__classifier_filename) predictions = model.predict_proba(embed) return errcode, predictions
import cv2 from sort import * from util import * #vid = cv2.VideoCapture(0) vid = cv2.VideoCapture('test.mp4') video_frame_cnt = int(vid.get(7)) video_width = int(vid.get(3)) video_height = int(vid.get(4)) video_fps = int(vid.get(5)) record_video = True #ecord_video = False if record_video: out = cv2.VideoWriter('data/outvideo.avi',cv2.VideoWriter_fourcc('M','J','P','G'), video_fps, (video_width, video_height)) # for writing Video face_detector = MTCNN() #Initializing MTCNN detector object face_tracker = Sort(max_age=50) #Initializing SORT tracker object ret , frame = vid.read() while ret: try: ret , frame = vid.read() original_frame = frame.copy() frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) result = face_detector.detect_faces(frame) min_confidence= 0.4 box = [] for i in range(len(result)): box_ = result[i]["box"]
from mtcnn.mtcnn import MTCNN import cv2 import dlib import numpy as np import os detector1 = MTCNN() detector2 = dlib.get_frontal_face_detector() modelFile = "models/res10_300x300_ssd_iter_140000.caffemodel" configFile = "models/deploy.prototxt.txt" net = cv2.dnn.readNetFromCaffe(configFile, modelFile) classifier2 = cv2.CascadeClassifier('models/haarcascade_frontalface2.xml') images = os.listdir('faces') # os.makedirs('faces/dlib') # os.makedirs('faces/mtcnn') # os.makedirs('faces/dnn') # os.makedirs('faces/haar') for image in images: img = cv2.imread(os.path.join('faces', image)) # img = cv2.resize(img, None, fx=2, fy=2) height, width = img.shape[:2] img1 = img.copy() img2 = img.copy() img3 = img.copy() gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # detect faces in the image faces1 = detector1.detect_faces(img_rgb)
# Load pretrained Inception-ResNet-v1 model # Update model and weights path according to your working environment model_path = "Models/Inception_ResNet_v1.json" weights_path = "Models/facenet_keras_weights.h5" # weights_path = "enc1_model_weights.h5" json_file = open(model_path, 'r') loaded_model_json = json_file.read() json_file.close() print(loaded_model_json) enc_model = model_from_json(loaded_model_json) enc_model.load_weights(weights_path) mtcnn_detector = MTCNN() class Ui_Form(): lastLabel = "" def setupUi(self, Form): Form.setObjectName("Nhan Dien Khuon Mat") Form.resize(1100, 768) self.videoCapture = QtWidgets.QLabel(Form) self.videoCapture.setGeometry(QtCore.QRect(300, 120, 640, 480)) self.videoCapture.setFrameShape(QtWidgets.QFrame.Box) self.videoCapture.setFrameShadow(QtWidgets.QFrame.Raised) self.videoCapture.setLineWidth(6) self.videoCapture.setText("") self.videoCapture.setObjectName("videoCapture")
# Give the image link url = "https://upload.wikimedia.org/wikipedia/commons/thumb/8/8d/Channing_Tatum_by_Gage_Skidmore_3.jpg/330px-Channing_Tatum_by_Gage_Skidmore_3.jpg" # Open the link and save the image to res res = request.urlopen(url) # Read the res object and convert it to an array img = np.asarray(bytearray(res.read()), dtype='uint8') # Add the color variable img = cv2.imdecode(img, cv2.IMREAD_COLOR) # Show the image cv2_imshow(img) """# Step 2: Face detection""" # Initialize mtcnn detector detector = MTCNN() # set face extraction parameters target_size = (224,224) # output image size border_rel = 0 # increase or decrease zoom on image # detect faces in the image detections = detector.detect_faces(img) print(detections) x1, y1, width, height = detections[0]['box'] dw = round(width * border_rel) dh = round(height * border_rel) x2, y2 = x1 + width + dw, y1 + height + dh face = img[y1:y2, x1:x2]
with open('persons.txt', 'r') as f: persons = f.readlines() def find_boxes(faces): boxes = [] for result in faces: if result['confidence'] > .9: x, y, width, height = result['box'] x_max = x + width y_max = y + height boxes.append((y, x+width, y+height, x)) return boxes cap = cv2.VideoCapture(0) detector = MTCNN() while True: ret, img = cap.read() if ret == True: rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) faces = detector.detect_faces(rgb) boxes = find_boxes(faces) t = time.time() embeddings = face_recognition.face_encodings(rgb, boxes, num_jitters = 1) for i, embedding in enumerate(embeddings): matches = [] for person in persons: match = face_recognition.compare_faces(embedding_dict[person.rstrip()], embedding, tolerance = .55) matches.append(sum(match)) cv2.rectangle(img, (boxes[i][3], boxes[i][0]), (boxes[i][1], boxes[i][2]), (255, 0, 0), 2)
#define store path store_root_dir = ".\\result" store_image_dir = os.path.join(store_root_dir, "CACD2000") if os.path.exists(store_image_dir) is False: os.makedirs(store_image_dir) #define some param for mtcnn src = np.array([[30.2946, 51.6963], [65.5318, 51.5014], [48.0252, 71.7366], [33.5493, 92.3655], [62.7299, 92.2041]], dtype=np.float32) threshold = [0.6, 0.7, 0.9] factor = 0.85 minSize = 20 imgSize = [120, 100] detector = MTCNN(steps_threshold=threshold, scale_factor=factor, min_face_size=minSize) #align,crop and resize keypoint_list = ['left_eye', 'right_eye', 'nose', 'mouth_left', 'mouth_right'] for filename in tqdm(os.listdir(image_root_dir)): dst = [] filepath = os.path.join(image_root_dir, filename) storepath = os.path.join(store_image_dir, filename) npimage = np.array(Image.open(filepath)) #Image.fromarray(npimage.astype(np.uint8)).show() dictface_list = detector.detect_faces( npimage ) #if more than one face is detected, [0] means choose the first face
def main(): weight_file = "../pre-trained/megaface_asian/ssrnet_3_3_3_64_1.0_1.0/ssrnet_3_3_3_64_1.0_1.0.h5" # for face detection # detector = dlib.get_frontal_face_detector() detector = MTCNN() try: os.mkdir('./img') except OSError: pass # load model and weights img_size = 64 stage_num = [3, 3, 3] lambda_local = 1 lambda_d = 1 model = SSR_net(img_size, stage_num, lambda_local, lambda_d)() model.load_weights(weight_file) clip = VideoFileClip(sys.argv[1]) # can be gif or movie #python version pyFlag = '' if len(sys.argv) < 3: pyFlag = '2' #default to use moviepy to show, this can work on python2.7 and python3.5 elif len(sys.argv) == 3: pyFlag = sys.argv[2] #python version else: print('Wrong input!') sys.exit() img_idx = 0 detected = '' #make this not local variable time_detection = 0 time_network = 0 time_plot = 0 ad = 0.4 skip_frame = 5 # every 5 frame do 1 detection and network forward propagation for img in clip.iter_frames(): img_idx = img_idx + 1 input_img = img #using python2.7 with moivepy to show th image without channel flip if pyFlag == '3': input_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_h, img_w, _ = np.shape(input_img) input_img = cv2.resize(input_img, (1024, int(1024 * img_h / img_w))) img_h, img_w, _ = np.shape(input_img) if img_idx == 1 or img_idx % skip_frame == 0: # detect faces using dlib detector start_time = timeit.default_timer() detected = detector.detect_faces(input_img) elapsed_time = timeit.default_timer() - start_time time_detection = time_detection + elapsed_time faces = np.empty((len(detected), img_size, img_size, 3)) for i, d in enumerate(detected): print(i) print(d['confidence']) if d['confidence'] > 0.95: x1, y1, w, h = d['box'] x2 = x1 + w y2 = y1 + h xw1 = max(int(x1 - ad * w), 0) yw1 = max(int(y1 - ad * h), 0) xw2 = min(int(x2 + ad * w), img_w - 1) yw2 = min(int(y2 + ad * h), img_h - 1) cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2) # cv2.rectangle(img, (xw1, yw1), (xw2, yw2), (255, 0, 0), 2) faces[i, :, :, :] = cv2.resize( input_img[yw1:yw2 + 1, xw1:xw2 + 1, :], (img_size, img_size)) start_time = timeit.default_timer() if len(detected) > 0: # predict ages and genders of the detected faces results = model.predict(faces) predicted_ages = results # draw results for i, d in enumerate(detected): if d['confidence'] > 0.95: x1, y1, w, h = d['box'] label = "{}".format(int(predicted_ages[i])) draw_label(input_img, (x1, y1), label) elapsed_time = timeit.default_timer() - start_time time_network = time_network + elapsed_time start_time = timeit.default_timer() if pyFlag == '2': img_clip = ImageClip(input_img) img_clip.show() cv2.imwrite('img/' + str(img_idx) + '.png', cv2.cvtColor(input_img, cv2.COLOR_RGB2BGR)) elif pyFlag == '3': cv2.imshow("result", input_img) cv2.imwrite('img/' + str(img_idx) + '.png', cv2.cvtColor(input_img, cv2.COLOR_RGB2BGR)) elapsed_time = timeit.default_timer() - start_time time_plot = time_plot + elapsed_time else: for i, d in enumerate(detected): if d['confidence'] > 0.95: x1, y1, w, h = d['box'] x2 = x1 + w y2 = y1 + h xw1 = max(int(x1 - ad * w), 0) yw1 = max(int(y1 - ad * h), 0) xw2 = min(int(x2 + ad * w), img_w - 1) yw2 = min(int(y2 + ad * h), img_h - 1) cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2) # cv2.rectangle(img, (xw1, yw1), (xw2, yw2), (255, 0, 0), 2) faces[i, :, :, :] = cv2.resize( input_img[yw1:yw2 + 1, xw1:xw2 + 1, :], (img_size, img_size)) # draw results for i, d in enumerate(detected): if d['confidence'] > 0.95: x1, y1, w, h = d['box'] label = "{}".format(int(predicted_ages[i])) draw_label(input_img, (x1, y1), label) start_time = timeit.default_timer() if pyFlag == '2': img_clip = ImageClip(input_img) img_clip.show() elif pyFlag == '3': cv2.imshow("result", input_img) elapsed_time = timeit.default_timer() - start_time time_plot = time_plot + elapsed_time #Show the time cost (fps) print('avefps_time_detection:', img_idx / time_detection) print('avefps_time_network:', img_idx / time_network) print('avefps_time_plot:', img_idx / time_plot) print('===============================') if pyFlag == '3': key = cv2.waitKey(30) if key == 27: break
def process_video(input_video_path, output_video_path): video_capture = cv.VideoCapture(input_video_path) video_writer = cv.VideoWriter( output_video_path, cv.VideoWriter_fourcc('F', 'M', 'P', '4'), video_capture.get(cv.CAP_PROP_FPS), (int(video_capture.get(3)), int(video_capture.get(4)))) success, frame = video_capture.read() count = 0 detector = MTCNN() net = facecnn.FACECNN() classifier_filename_exp = './svm_weights/params' with open(classifier_filename_exp, 'rb') as infile: model = pickle.load(infile) print('Loaded classifier model from file "%s"' % classifier_filename_exp) while success: frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB) result = detector.detect_faces(frame) # print(result) frame_faces = [] detected_faces = [] for i in range(len(result)): bounding_box = result[i]['box'] # now crop out the face # print(np.shape(frame)) face = frame[bounding_box[1]:bounding_box[1] + bounding_box[3], bounding_box[0]:bounding_box[0] + bounding_box[2], :] if (face.size > 0): # cv.rectangle(frame, (bounding_box[0], bounding_box[1]), (bounding_box[0] + bounding_box[2], bounding_box[1] + bounding_box[3]), (255, 0, 255), 2) # face = cv.cvtColor(face, cv.COLOR_RGB2BGR) face = cv.resize(face, (160, 160)) frame_faces.append(face) detected_faces.append(result[i]['box']) # face_embedding = net.get_embeddings() # cv.imwrite(os.path.join(output_dir, 'frame' + str(count) + '_face' + str(i) + '.bmp'), face) # print(np.shape(frame_faces)[0]) embeddings = net.get_embeddings(frame_faces) predictions = model.predict_proba(embeddings) best_class_indices = np.argmax(predictions, axis=1) # print(np.shape(detected_faces), np.shape(best_class_indices)) for box, prob in zip(detected_faces, best_class_indices): if (prob): cv.rectangle(frame, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (0, 255, 0), 2) else: cv.rectangle(frame, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (255, 0, 0), 2) frame = cv.cvtColor(frame, cv.COLOR_RGB2BGR) #do something with the result cv.imshow('Processed Frame', frame) if cv.waitKey(25) & 0xFF == ord('q'): break video_writer.write(frame) print('\rFrame {}/{}'.format( count, int(video_capture.get(cv.CAP_PROP_FRAME_COUNT))), end='') success, frame = video_capture.read() count += 1 video_capture.release() video_writer.release() print('\nDone')
from matplotlib.patches import Circle #filter out all tensorflow warnings and info os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # draw each face separately def draw_faces(filename, result_list): # load the image data = pyplot.imread(filename) # plot each face as a subplot for i in range(len(result_list)): # get coordinates x1, y1, width, height = result_list[i]['box'] x2, y2 = x1 + width, y1 + height # define subplot pyplot.subplot(1, len(result_list), i+1) pyplot.axis('off') # plot face pyplot.imshow(data[y1:y2, x1:x2]) # show the plot pyplot.show() # load image from file filename = 'test2.jpg' pixels = pyplot.imread(filename) # create the detector, using default weights detector = MTCNN() # detect faces in the image faces = detector.detect_faces(pixels) # display faces on the original image draw_faces(filename, faces)
class FaceDetector: # local variables that don't change while program is running # the most important are refresh_rate (how often searching on full screen towards faces will be performed) # and radius (radius of searching for a local search) min_YCrCb = np.array([0, 133, 77], np.uint8) max_YCrCb = np.array([255, 173, 127], np.uint8) kernel = np.ones((5, 5), np.uint8) refresh_rate = 60 radius = 40 small_radius = 40 detector = MTCNN() tracker = dlib.correlation_tracker() def __init__(self): # local variables useful for continuous detection for one instance of the detector # most important are counter (at which frame we are at) # and Locations[] (list of approximated locations where faces are on this frame) self.counter = 0 self.Locations = [] self.trackers = [] # method for getting mask of faces, using skin extraction based on our Locations[] and skin color values in YCrCb # color space def get_mask(self, frame): new_frame = np.zeros((1080, 1920, 3), np.uint8) for i in range(len(self.Locations)): if 2 * self.Locations[i][0] > 10 and 2 * self.Locations[i][2] < 1070 and 2 * self.Locations[i][1] < 1910 \ and 2 * self.Locations[i][3] > 10: face_cropped = frame[(2 * self.Locations[i][0] - 10):(2 * self.Locations[i][2] + 10), (2 * self.Locations[i][3] - 10):(2 * self.Locations[i][1] + 10)] imageYCrCb = cv2.cvtColor(face_cropped, cv2.COLOR_BGR2YCR_CB) imageYCrCb = cv2.erode(imageYCrCb, self.kernel, iterations=6) imageYCrCb = cv2.dilate(imageYCrCb, self.kernel, iterations=6) skinRegionYCrCb = cv2.inRange(imageYCrCb, self.min_YCrCb, self.max_YCrCb) mask = np.zeros_like(face_cropped) ellipse_points = self.extract_ellipse_points(i) ellipse = cv2.fitEllipse(ellipse_points) mask = cv2.ellipse(mask, ellipse, color=(255, 255, 255), thickness=-1) face_cropped = cv2.bitwise_and(face_cropped, mask, mask=skinRegionYCrCb) new_frame[(2 * self.Locations[i][0] - 10):(2 * self.Locations[i][2] + 10), (2 * self.Locations[i][3] - 10):(2 * self.Locations[i][1] + 10)] = face_cropped return new_frame # method for performing whole frame search for faces using MTCNN face detector # Locations[] are filled with the coordinates of the faces def full_search(self, frame): detected_faces = self.detector.detect_faces(frame) self.Locations.clear() for result in detected_faces: x, y, width, height = result['box'] left = x right = x + width top = y bottom = y + height self.Locations.append([top, right, bottom, left]) def new_search(self, frame, head_list): self.Locations.clear() for element in head_list: left_margin = int(element[0] // 2 - self.small_radius) right_margin = int(element[0] // 2 + self.small_radius) top_margin = int(element[1] // 2 - self.small_radius) bot_margin = int(element[1] // 2 + self.small_radius) if left_margin < 0: left_margin = 0 if right_margin >= 960: right_margin = 956 if top_margin < 0: top_margin = 0 if bot_margin >= 540: bot_margin = 539 cropped = frame[top_margin:bot_margin, left_margin:right_margin] detected_faces = self.detector.detect_faces(cropped) for result in detected_faces: x, y, width, height = result['box'] left = x right = x + width top = y bottom = y + height self.Locations.append([ top_margin + top, left_margin + right, top_margin + bottom, left_margin + left ]) return frame # method for starting multiple dlib trackers for objects located in Locations[] def start_trackers(self, frame): new_trackers = [] for result in self.Locations: maxArea = 0 x = 0 y = 0 w = 0 h = 0 if (result[1] - result[3]) * (result[2] - result[0]) > maxArea: x = int(result[3]) y = int(result[0]) w = int(result[1] - result[3]) h = int(result[2] - result[0]) maxArea = w * h if maxArea > 0: t = dlib.correlation_tracker() t.start_track(frame, dlib.rectangle(x, y, x + w, y + h)) new_trackers.append(t) return new_trackers # method of searching for a one face on a small area (performed when tracker has lost tracking object) based on the # last useful tracker location # Locations[] gets updated for specific index def small_search(self, frame, index): if (self.Locations[index][0] - self.radius > 0) and (self.Locations[index][3] - self.radius > 0) and ( self.Locations[index][2] + self.radius < 540) and (self.Locations[index][1] + self.radius < 960): cropped = frame[(self.Locations[index][0] - self.radius):(self.Locations[index][2] + self.radius), (self.Locations[index][3] - self.radius):(self.Locations[index][1] + self.radius)] detected_faces = self.detector.detect_faces(cropped) for result in detected_faces: x, y, width, height = result['box'] left = x right = x + width top = y bottom = y + height for j in range(len(detected_faces)): self.Locations[index][1] = (self.Locations[index][3] + right - self.radius) self.Locations[index][2] = (self.Locations[index][0] + bottom - self.radius) self.Locations[index][0] = (self.Locations[index][0] + top - self.radius) self.Locations[index][3] = (self.Locations[index][3] + left - self.radius) return len(detected_faces) # writing current tracking location into Location[] assuming face is the written area def save_location(self, x, y, w, h, index): self.Locations[index][0] = y self.Locations[index][1] = x + w self.Locations[index][2] = y + h self.Locations[index][3] = x # method for unpacking position returned by tracker.getPosition() method (dlib.Rectangle) def unpack_position(self, box): x = int(box.left()) y = int(box.top()) w = int(box.width()) h = int(box.height()) return x, y, w, h # method for data preparation by translating our Location[] system into dlib.Rectangle used by trackers def extract_box(self, index): x = int(self.Locations[index][3]) y = int(self.Locations[index][0]) w = int(self.Locations[index][1] - self.Locations[index][3]) h = int(self.Locations[index][2] - self.Locations[index][0]) return x, y, w, h def extract_ellipse_points(self, index): _, _, w, h = self.extract_box(index) w *= 2 h *= 2 points = [[10, 10], [10, h + 10], [w + 10, (h + 10) / 2], [10, (h + 10) / 2], [(w + 10) / 2, h], [(w + 10) / 2, 10], [w + 10, h + 10], [w + 10, 10]] return np.array(points, dtype=np.int32) # main method where all magic happens def face_processing(self, frame, heads): # Downsampled image used only for search algorithm small_frame = cv2.resize(frame, (960, 540), 0, 0) # every (refresh_rate) frames search based on head locations is performed # using MTCNN detector # and also trackers are being refreshed if self.counter % self.refresh_rate == 0: self.new_search(small_frame, heads) elif self.counter % self.refresh_rate == 1: self.trackers = self.start_trackers(small_frame) # in standard case scenario basic tracking is performed # trackers are being updated else: if self.counter % 3 != 2: if len(self.trackers) != 1: mark = len(self.trackers) half = int(mark / 2) j = 0 if (self.counter % 3 == 0): for i in range(0, half): trackingQuality = self.trackers[i - j].update( small_frame) tracked_position = self.trackers[i - j].get_position() t_x, t_y, t_w, t_h = self.unpack_position( tracked_position) if trackingQuality >= 4.0: self.save_location(t_x, t_y, t_w, t_h, i) # in case of losing tracked object from a tracker window searching using MTCNN detector on a small # area is performed using area of last known position of a tracker window else: self.trackers.pop(i) check = self.small_search(small_frame, i) j += 1 if check != 0: x, y, w, h = self.extract_box(i) t = dlib.correlation_tracker() t.start_track( small_frame, dlib.rectangle(x, y, x + w, y + h)) self.trackers.insert(i, t) else: for i in range(half, mark): trackingQuality = self.trackers[i - j].update( small_frame) tracked_position = self.trackers[i - j].get_position() t_x, t_y, t_w, t_h = self.unpack_position( tracked_position) if trackingQuality >= 4.0: self.save_location(t_x, t_y, t_w, t_h, i) # in case of losing tracked object from a tracker window searching using MTCNN detector on a small # area is performed using area of last known position of a tracker window else: self.trackers.pop(i) check = self.small_search(small_frame, i) j += 1 if check != 0: x, y, w, h = self.extract_box(i) t = dlib.correlation_tracker() t.start_track( small_frame, dlib.rectangle(x, y, x + w, y + h)) self.trackers.insert(i, t) else: for i in range(len(self.trackers)): trackingQuality = self.trackers[i].update(small_frame) tracked_position = self.trackers[i].get_position() t_x, t_y, t_w, t_h = self.unpack_position( tracked_position) if trackingQuality >= 4.0: self.save_location(t_x, t_y, t_w, t_h, i) # in case of losing tracked object from a tracker window searching using MTCNN detector on a small # area is performed using area of last known position of a tracker window else: self.trackers.pop(i) check = self.small_search(small_frame, i) if check != 0: x, y, w, h = self.extract_box(i) t = dlib.correlation_tracker() t.start_track( small_frame, dlib.rectangle(x, y, x + w, y + h)) self.trackers.insert(i, t) # next thing done is extracting the mask of the faces and hovering it into frame with converted background # mask is extracted from original frame new_frame = self.get_mask(frame) self.counter += 1 return new_frame
def __init__(self, model, graph): global video self.video = cv2.VideoCapture(0) self.gender_model = model self.graph = graph self.detector = MTCNN()
# fit model model = SVC(kernel='linear', probability=True) model.fit(emdTrainX_norm, trainy_enc) from inception_resnet_v1 import * facenet_model = InceptionResNetV1() print("model built") facenet_model.load_weights('facenet_weights.h5') print("weights loaded") cap = cv2.VideoCapture(0) #webcam while (True): ret, img = cap.read() detector = MTCNN() # detect faces in the image results = detector.detect_faces(img) print('results') #print(results) for i in range(len(results)): x, y, w, h = results[i]['box'] if w > 130: #discard small detected faces cv2.rectangle(img, (x, y), (x + w, y + h), (67, 67, 67), 1) #draw rectangle to main image detected_face = img[int(y):int(y + h), int(x):int(x + w)] #crop detected face detected_face = cv2.resize(detected_face, (160, 160)) #resize to 224x224
def predict_emotion(): emotion_list = [ 'Surprise', 'Fear', 'Disgust', 'Happiness', 'Sadness', 'Anger', 'Neutral' ] model = RN.resnet10(include_top=False, pooling='avg', input_tensor=None, input_shape=(224, 224, 3), classes=7) x = model.output x = Dense(7, activation='softmax', name='fc8_5')(x) model = Model(inputs=model.input, outputs=x) model.load_weights( '/home/app/program/micro_emotion/resnet10/macro/model.h5', by_name=True) dector = MTCNN() # img_path = '/home/app/data/beiyou/basic/Image/test/train/3/test_0063.jpg' # img = cv2.imread(img_path) # # t = dector.detect_faces(img) # # point = t[0]['box'] # # face = img[point[1]:point[1] + point[3], point[0]:point[0] + point[2]] # face = cv2.resize(img, (224, 224)) # cv2.imshow('face',face) # face = img_to_array(face) # face = face.reshape((-1, 224, 224, 3)) # out = model.predict(face) # print(emotion_list[out.argmax()]) # cv2.waitKey() capture = cv2.VideoCapture(0) while (True): ref, frame = capture.read() img = frame.copy() t = dector.detect_faces(img) point = t[0]['box'] #face = img[point[1]:point[1] + point[3], point[0]:point[0] + point[2]] keypoint1 = np.float32([[30, 30], [70, 30], [50, 80]]) keypoint2 = [] keypoint2.append(t[0]['keypoints']['left_eye']) keypoint2.append(t[0]['keypoints']['right_eye']) x = np.array(t[0]['keypoints']['mouth_left'], dtype=np.float32) y = np.array(t[0]['keypoints']['mouth_right'], dtype=np.float32) center = (x + y) / 2 keypoint2 = np.array(keypoint2, dtype=np.float32) keypoint2 = np.row_stack((keypoint2, center)) matrix = cv2.getAffineTransform(keypoint2, keypoint1) output = cv2.warpAffine(img, matrix, (img.shape[1], img.shape[0])) face = output[:100, :100] face = cv2.resize(face, (224, 224)) face = img_to_array(face) face = face.reshape((-1, 224, 224, 3)) start = time.clock() out = model.predict(face) end = time.clock() print('耗时{}s'.format(end - start)) #print(out) print(emotion_list[out.argmax()]) cv2.rectangle(frame, (point[0], point[1]), (point[0] + point[2], point[1] + point[3]), (0, 255, 0), 2) cv2.imshow('1', frame) cv2.waitKey(1)
class FaceApi: detector = MTCNN()
import numpy as np import cv2 from keras.models import load_model from image_preprocess import preprocess from mtcnn.mtcnn import MTCNN detector = MTCNN() # face_cascade = cv2.CascadeClassifier('Cascades\data\haarcascade_frontalface_alt2.xml') model = load_model('models/face_mask_vggface_vgg16.h5') cap = cv2.VideoCapture(0) while (True): ret, frame = cap.read() gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) #Detect Face using MTNCC faces = detector.detect_faces(image) if faces != []: color = (255, 0, 0) stroke = 2 x = faces[0]['box'][0] y = faces[0]['box'][1] w = faces[0]['box'][2] h = faces[0]['box'][3] cv2.rectangle(frame, (x, y), (x + w, y + h), color, stroke)
default=1.24, type=float, help='ver dist threshold') args = ap.parse_args() # Load embeddings and labels data = pickle.loads(open(args.embeddings, "rb").read()) le = pickle.loads(open(args.le, "rb").read()) embeddings = np.array(data['embeddings']) print(len(embeddings)) labels = le.fit_transform(data['names']) # Initialize detector detector = MTCNN() # Initialize faces embedding model embedding_model = face_model.FaceModel(args) # Load the classifier model model = load_model('outputs/my_model.h5') # Define distance function def findCosineDistance(vector1, vector2): """ Calculate cosine distance between two vector """ vec1 = vector1.flatten() vec2 = vector2.flatten()
print('already renamed') for extension in extensions: for i, file in enumerate(glob.glob('%s/*%s' % (in_dir, extension))): image_path_list.append(file) image_path_list = sorted(image_path_list) # print (image_path_list) # create an empty dictionary for filename, coordinate info # to be written to a json file for the replacer script on the other side info_dict = {} # walk the list of input images, detect images detector = MTCNN() if not os.path.isfile('%s/already_cropped.json' % in_dir): with open('%s/already_cropped.json' % in_dir, 'w') as outfile: json.dump('already cropped!', outfile, indent=4) outfile.write("\n") for i, image_path in enumerate(image_path_list): try: image = cv2.imread(str(image_path)) results = detector.detect_faces(image)[0] x, y, w, h = results['box'] pad = int(.3 * h) x -= pad w += 2 * pad
class FaceDetector: def __init__(self): self.facenet_model = load_model( "D:\\PYTHON_CODE\\Face_Recognition\\facenet_keras.h5") self.svm_model = pickle.load( open("D:\\PYTHON_CODE\\Face_Recognition\\SVM_classifier.sav", 'rb')) self.data = np.load( 'D:\\PYTHON_CODE\\Face_Recognition\\faces_dataset_embeddings.npz') # object to the MTCNN detector class self.detector = MTCNN() def face_mtcnn_extractor(self, frame): """Methods takes in frames from video, extracts and returns faces from them""" # Use MTCNN to detect faces in each frame of the video result = self.detector.detect_faces(frame) return result def face_localizer(self, person): """Method takes the extracted faces and returns the coordinates""" # 1. Get the coordinates of the face bounding_box = person['box'] x1, y1 = abs(bounding_box[0]), abs(bounding_box[1]) width, height = bounding_box[2], bounding_box[3] x2, y2 = x1 + width, y1 + height return x1, y1, x2, y2, width, height def face_preprocessor(self, frame, x1, y1, x2, y2, required_size=(160, 160)): """Method takes in frame, face coordinates and returns preprocessed image""" # 1. extract the face pixels face = frame[y1:y2, x1:x2] # 2. resize pixels to the model size image = Image.fromarray(face) image = image.resize(required_size) face_array = np.asarray(image) # 3. scale pixel values face_pixels = face_array.astype('float32') # 4. standardize pixel values across channels (global) mean, std = face_pixels.mean(), face_pixels.std() face_pixels = (face_pixels - mean) / std # 5. transform face into one sample samples = np.expand_dims(face_pixels, axis=0) # 6. get face embedding yhat = self.facenet_model.predict(samples) face_embedded = yhat[0] # 7. normalize input vectors in_encoder = Normalizer(norm='l2') X = in_encoder.transform(face_embedded.reshape(1, -1)) return X def face_svm_classifier(self, X): """Methods takes in preprocessed images ,classifies and returns predicted Class label and probability""" # predict yhat = self.svm_model.predict(X) label = yhat[0] yhat_prob = self.svm_model.predict_proba(X) probability = round(yhat_prob[0][label], 2) trainy = self.data['arr_1'] # predicted label decoder out_encoder = LabelEncoder() out_encoder.fit(trainy) predicted_class_label = out_encoder.inverse_transform(yhat) label = predicted_class_label[0] return label, str(probability) def face_detector(self): """Method classifies faces on live cam feed Class labels : sai_ram, donald_trump,narendra_modi, virat_koli""" # open cv for live cam feed cap = cv2.VideoCapture(0) while True: # Capture frame-by-frame __, frame = cap.read() # 1. Extract faces from frames result = self.face_mtcnn_extractor(frame) if result: for person in result: # 2. Localize the face in the frame x1, y1, x2, y2, width, height = self.face_localizer(person) # 3. Proprocess the images for prediction X = self.face_preprocessor(frame, x1, y1, x2, y2, required_size=(160, 160)) # 4. Predict class label and its probability label, probability = self.face_svm_classifier(X) print(" Person : {} , Probability : {}".format( label, probability)) # 5. Draw a frame cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 155, 255), 2) # 6. Add the detected class label to the frame cv2.putText(frame, label + probability, (x1, height), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), lineType=cv2.LINE_AA) # display the frame with label cv2.imshow('frame', frame) # break on keybord interuption with 'q' if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything's done, release capture cap.release() cv2.destroyAllWindows()
(bounding_box[0] + bounding_box[2], bounding_box[1] + bounding_box[3]), color, 2) cv2.putText(frame, str(uu), (bounding_box[0] + 2, bounding_box[1] + 14), cv2.FONT_HERSHEY_PLAIN, 1, (0, 155, 255), 1) #Указатель направления взгляда look_x = bounding_box[0] + bounding_box[2] // 2 look_y = bounding_box[1] final_point_x = look_x - round(bounding_box[2] * math.sin(x_angle * math.pi / 180)) final_point_y = look_y - round(bounding_box[2] * math.sin(y_angle * math.pi / 180)) cv2.line(frame, (look_x, look_y), (final_point_x, final_point_y), color, 3) size = round((bounding_box[2] + bounding_box[3] / 2) / 8) cv2.circle(frame, (final_point_x, final_point_y), size, color, 2) detector = MTCNN() @jit def detect(frame): return detector.detect_faces(frame) #cap = cv2.VideoCapture(0) cap = cv2.VideoCapture("output.mp4") file_name = "save/log.txt" file = open(file_name, 'w') file.close() while(True): file = open(file_name, 'a') start = timer()
def get_all_fnames(base_folder): all_fnames = glob(str(Path(base_folder, '**', '*')), recursive=True) all_imgs = [] patterns = ['*jpg', '*jpeg', '*png'] for pattern in patterns: match = re.compile(fnmatch.translate(pattern), re.IGNORECASE).match valid_pths = [pth for pth in all_fnames if match(pth)] all_imgs.extend(valid_pths) return all_imgs anfas_detector = MTCNN(steps_threshold = [0.4, 0.6, 0.6], min_face_size = 100) base_folder = '../example' result_base_folder = Path(f'{base_folder}_result') if not os.path.exists(result_base_folder): os.mkdir(result_base_folder) all_imgs = get_all_fnames(base_folder) print(f'Найдено изображений: {len(all_imgs)}') for num, img_path in enumerate(tqdm(all_imgs[:])): try: if num % 100 == 0: with open('progress.txt', 'w') as f: f.write(str(num))