Пример #1
0
import progressbar
import time
from mtcnn.mtcnn import MTCNN

program_name = 'mtcnn_face_alignment'
input_filename = '1.no_motion_resize'
output_filename = '{:s}_{:s}'.format(input_filename, program_name)

video_input = './videos/{:s}.mp4'.format(input_filename)
video_output = './videos/result/{:s}.mp4'.format(output_filename)
if not os.path.exists('./videos/result/'):
    os.makedirs('./videos/result/')
if not os.path.exists('./csv/'):
    os.makedirs('./csv/')

detector = MTCNN()
vidin = cv2.VideoCapture(video_input)
ret, frame = vidin.read()
fps = vidin.get(cv2.CAP_PROP_FPS)
frames = vidin.get(cv2.CAP_PROP_FRAME_COUNT)
results = {}

print(' Video FPS rate is {}'.format(fps))
print(' {} total frames'.format(frames))
print(' Frame size : {}'.format(frame.shape))

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
vidout = cv2.VideoWriter(video_output, fourcc, fps,
                         (frame.shape[1], frame.shape[0]))
Пример #2
0
import cv2
from mtcnn.mtcnn import MTCNN

detector = MTCNN()
#cap = cv2.VideoCapture(0)

#while(True):
image = cv2.imread("ThanhABC_1.jpg")
#    ret, image = cap.read()

result = detector.detect_faces(image)

img=image

for person in result:
    bounding_box = person['box']
    keypoints = person['keypoints']
    
 #   cv2.rectangle(image,
 #                 (bounding_box[0], bounding_box[1]),
  #                (bounding_box[0]+bounding_box[2], bounding_box[1] + bounding_box[3]),
  #                (0,155,255),
  #                2)
 #   cv2.circle(image,(keypoints['left_eye']), 2, (0,155,255), 2)
 #   cv2.circle(image,(keypoints['right_eye']), 2, (0,155,255), 2)
 #   cv2.circle(image,(keypoints['nose']), 2, (0,155,255), 2)
 #   cv2.circle(image,(keypoints['mouth_left']), 2, (0,155,255), 2)
 #   cv2.circle(image,(keypoints['mouth_right']), 2, (0,155,255), 2)

    img = image[bounding_box[1]:bounding_box[1]+bounding_box[3], bounding_box[0]:bounding_box[0] + bounding_box[2]]
cv2.imshow("image",img)
Пример #3
0
import tensorflow as tf
import numpy as np
import os
import cv2
from mtcnn.mtcnn import MTCNN
from sklearn import cross_validation
from task4 import *
tf.enable_eager_execution()

if __name__ == '__main__':
    cap = cv2.VideoCapture(0)
    detector = MTCNN()
    model = CNN()
    model.load_weights(r"F:\python3\renlianshibie\CNNmodel")
    #DIR = r"F:\python3\renlianshibie\faceImageGray"
    #names_dict = name_dict(DIR)
    names_dict = {'0': 'huajinqing',
                  '1': 'liangchunfu',
                  '2': 'lijunyu',
                  '3': 'linjuncheng',
                  '4': 'linweixin',
                  '5': 'liujunhao',
                  '6': 'xuhaolin',
                  '7': 'zenglingqi',
                  '8': 'zhouyuanxiang',
                  '9': 'zhushichao'}

    print("按z退出摄像头")
    while(True):
        ret, frame = cap.read()  # 读取一帧的图像
        z = detector.detect_faces(frame)
Пример #4
0
 def __init__(self, path, optimize, minfacesize):
     from mtcnn.mtcnn import MTCNN  # lazy loading
     self._optimize = optimize
     self._minfacesize = minfacesize
     self._detector = MTCNN(min_face_size=minfacesize)
Пример #5
0
from matplotlib import pyplot as plt
from facemodel import face_recognition
import cv2
from mtcnn.mtcnn import MTCNN

detector = MTCNN()

cap = cv2.VideoCapture(0)

# Default resolutions of the frame are obtained.The default resolutions are system dependent.
# We convert the resolutions from float to integer.
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
 
# Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
out = cv2.VideoWriter('outpy.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height))

while True: 
    #Capture frame-by-frame
    __, frame = cap.read()
    # rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    rgb = frame
    #Use MTCNN to detect faces
    result = detector.detect_faces(rgb)

    if result != []:
        for face in result:
            bounding_box = face['box']
            # keypoints = face['keypoints']
            x, y, w, h = bounding_box[0], bounding_box[1], bounding_box[2], bounding_box[3]
            rect_face = cv2.rectangle(frame, (x, y), (x+w, y+h), (46, 204, 113), 2)
Пример #6
0
    '--camera',
    default=0,
    type=int,
    help=
    'Select the camera using the port with the command "ls -ltrh /dev/video*".'
)

parser.add_argument('--video',
                    default=None,
                    type=str,
                    help='Select the video path')

args = parser.parse_args()

# Network initialisation
detector = MTCNN()

# Selection between camera and video, assign cap variable to it

if args.video == None:
    cap = cv2.VideoCapture(args.camera)
else:
    cap = cv2.VideoCapture(args.video)

previous = 0

while True:
    #Capture frame-by-frame
    __, frame = cap.read()

    start = time.time()
Пример #7
0
class VideoCamera(object):
    def __init__(self, model, graph):
        global video
        self.video = cv2.VideoCapture(0)
        self.gender_model = model
        self.graph = graph
        self.detector = MTCNN()

    def __del__(self):
        global video
        self.video.release()

    def process_img(self, face):
        image = Image.fromarray(face)
        image = image.resize((224, 224))
        face_array = np.asarray(image)
        face_array = face_array.reshape(1, 224, 224, 3)
        with self.graph.as_default():
            gen = self.gender_model.predict(face_array)
            if gen[0][0] == 1:
                text = "MALE"
            else:
                text = "FEMALE"
        return text

    def get_frame(self):
        global video
        # grabs webcam image
        ret, frame = self.video.read()
        # DO WHAT YOU WANT WITH TENSORFLOW / KERAS AND OPENCV
        # print(frame.shape)
        # detects faces with mtcnn. If no face detected. Except catches error and returns normal webcam image
        try:
            results = self.detector.detect_faces(frame)
        except:
            ret, jpeg = cv2.imencode('.jpg', frame)
            return jpeg.tobytes()
        if len(results) == 0:
            ret, jpeg = cv2.imencode('.jpg', frame)
            return jpeg.tobytes()
        # getting outline of face
        imshape = frame.shape
        x1, y1, width, height = results[0]['box']
        a, b, c, d = x1, y1, (x1 + width), (y1 + height)  #Box for display only
        # x1 = int(0.5*x1)
        # y1 = int(0.5*y1)
        x2, y2 = x1 + width, y1 + height
        # x2 = int(x2+0.5*(imshape[1]-x2))
        # y2 = int(y2+0.5*(imshape[0]-y2))
        # cropping to face
        face = frame[y1:y2, x1:x2]
        # about bounding box
        text = self.process_img(face)
        cv2.rectangle(frame, (a, b), (c, d), (0, 255, 0), 2)
        cv2.putText(frame,
                    text, (5, 25),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1.0, (0, 255, 0),
                    lineType=cv2.LINE_AA)

        ret, jpeg = cv2.imencode('.jpg', frame)
        return jpeg.tobytes()
Пример #8
0
class FaceDetector:
    # constant locators for landmarks
    jaw_points = np.arange(0, 17)  # face contour points
    eyebrow_dx_points = np.arange(17, 22)
    eyebrow_sx_points = np.arange(22, 27)
    nose_points = np.arange(27, 36)
    nosecenter_points = np.array([30, 33])
    right_eye = np.arange(36, 42)
    left_eye = np.arange(42, 48)

    def __init__(self, config):
        self.config = config

        # if specified use mtcnn model
        self.mtcnn_model_path = config.get('mtcnn_model_path', None)
        if self.mtcnn_model_path:
            from mtcnn.mtcnn import MTCNN
            self.detector = MTCNN()
        # otherwise rely on dlib detector
        else:
            self.detector_model_path = config.get('detector_path', None)
            if self.detector_model_path:
                self.detector = dlib.cnn_face_detection_model_v1(
                    self.detector_model_path)
            else:
                self.detector = dlib.get_frontal_face_detector()
        # always instantiate predictor
        self.predictor = dlib.shape_predictor(
            config.get('shape_predictor_path'))

    def _mtcnn_detect_faces(self, img):
        face_confidence_threshold = self.config['mtcnn_confidence_threshold']
        rects = self.detector.detect_faces(img)
        faces = [
            Face(
                img.copy(),
                # output bbox coordinate of MTCNN is [x, y, width, height]
                # need to max to 0 cause sometimes bbox has negative values ??library BUG
                Face.Rectangle(left=max(r['box'][0], 0),
                               top=max(r['box'][1], 0),
                               right=max(r['box'][0], 0) + max(r['box'][2], 0),
                               bottom=max(r['box'][1], 0) +
                               max(r['box'][3], 0))) for r in rects
            if r['confidence'] > face_confidence_threshold
        ]

        return faces

    def detect_faces(self, img):
        if self.mtcnn_model_path:
            faces = self._mtcnn_detect_faces(img)
        else:
            rects = self.detector(img, 1)
            # if using custom detector we need to get the rect attribute
            if self.detector_model_path:
                rects = [r.rect for r in rects]
            faces = [
                Face(
                    img.copy(),
                    Face.Rectangle(top=max(r.top(), 0),
                                   right=max(r.right(), 0),
                                   bottom=max(r.bottom(), 0),
                                   left=max(r.left(), 0))) for r in rects
            ]

        # continue only if we detected at least one face
        if len(faces) == 0:
            logging.debug("No face detected")
            raise FaceExtractException("No face detected.")

        for face in faces:
            face.landmarks = self.get_landmarks(face)

        return faces

    def get_landmarks(self, face: Face, recompute=False):
        # If landmarks already present, just return, unless is required to recompute them
        if face.landmarks is not None and not recompute:
            return face.landmarks
        else:
            # we need a dlib rectangle to get the landmarks
            dlib_rect = dlib.rectangle(left=face.rect.left,
                                       top=face.rect.top,
                                       right=face.rect.right,
                                       bottom=face.rect.bottom)
            shape = self.predictor(face.img, dlib_rect)
            return np.array([(p.x, p.y) for p in shape.parts()])

    @staticmethod
    def get_eyes(face: Face):
        lx_eye = face.landmarks[FaceDetector.left_eye]
        rx_eye = face.landmarks[FaceDetector.right_eye]
        return lx_eye, rx_eye

    @staticmethod
    def get_contour_points(shape):
        # shape to numpy
        points = np.array([(p.x, p.y) for p in shape.parts()])
        face_boundary = points[np.concatenate([
            FaceDetector.jaw_points, FaceDetector.eyebrow_dx_points,
            FaceDetector.eyebrow_sx_points
        ])]
        return face_boundary, shape.rect

    def extract_face(self, face: Face):
        """
        Utility method which uses directly the current detector configuration for the generic extraction operation
        :param face:
        :return:
        """
        # size is a tuple, so need to eval from string representation in config
        size = literal_eval(self.config['extract']['size'])
        border_expand = literal_eval(self.config['extract']['border_expand'])
        align = self.config['extract']['align']
        maintain_proportion = self.config['extract']['maintain_proportion']
        masked = self.config['extract']['masked']

        return self._extract_face(face,
                                  size,
                                  border_expand=border_expand,
                                  align=align,
                                  maintain_proportion=maintain_proportion,
                                  masked=masked)

    def _extract_face(self,
                      face: Face,
                      out_size=None,
                      border_expand=(0., 0.),
                      align=False,
                      maintain_proportion=False,
                      masked=False):
        face_size = face.get_face_size()
        border_expand = (int(border_expand[0] * face_size[0]),
                         int(border_expand[1] * face_size[1]))

        # if not specified otherwise, we want extracted face size to be exactly as input face size
        if not out_size:
            out_size = face_size

        face.landmarks = self.get_landmarks(face)
        if masked:
            mask = utils.get_face_mask(
                face,
                'hull',
                erosion_size=literal_eval(self.config['extract'].get(
                    'dilation_kernel', 'None')),
                dilation_kernel=literal_eval(self.config['extract'].get(
                    'dilation_kernel', 'None')),
                blur_size=int(self.config['extract']['blur_size']))
            # black all pixels outside the mask
            face.img = cv2.bitwise_and(face.img, face.img, mask=mask[:, :, 1])

        # keep proportions of original image (rect) for extracted image, otherwise resize might stretch the content
        if maintain_proportion:
            border_delta = self._get_maintain_proportion_delta(
                face_size, out_size)
            border_expand = (border_expand[0] + int(border_delta[0] // 2),
                             border_expand[1] + int(border_delta[1] // 2))

        if align:
            cut_face = utils.ffhq_align(face,
                                        output_size=out_size[0],
                                        boundary_resize_factor=border_expand)
            #cut_face, _ = utils.align_face(face, boundary_resize_factor=border_expand)
            #cut_face = utils._align_face(face, size=out_size)
        else:
            cut_face = cv2.resize(face.get_face_img(),
                                  out_size,
                                  interpolation=cv2.INTER_CUBIC)

        return cut_face

    def _get_maintain_proportion_delta(self, src_size, dest_size):
        """
        Return delta amount to maintain destination proportion given source size.
        Tuples order is (w, h)
        :param base_border:
        :param src_size:
        :param dest_size:
        :return:
        """
        dest_ratio = max(dest_size) / min(dest_size)
        delta_h = delta_w = 0
        w, h = src_size
        if w > h:
            delta_h = w * dest_ratio - h
        else:
            delta_w = h * dest_ratio - w
        return delta_w, delta_h
Пример #9
0
    def get_frame(self):

        _, fr = self.video.read()
        # fr = imutils.resize( fr, width=400 )
        #
        # modelFile = "res10_300x300_ssd_iter_140000.caffemodel"
        # configFile = "deploy.txt"
        # net = cv2.dnn.readNetFromCaffe(configFile, modelFile )
        # (h, w) = fr.shape[:2]
        # blob = cv2.dnn.blobFromImage(cv2.resize( fr, (300, 300)), 1.0,
        #                               (300, 300), (104.0, 177.0, 123.0) )
        # net.setInput( blob )
        # detections = net.forward()
        # # loop over the detections
        # for i in range( 0, detections.shape[2] ):
        #     # extract the confidence (i.e., probability) associated with the
        #     # prediction
        #     confidence = detections[0, 0, i, 2]
        #     # filter out weak detections by ensuring the `confidence` is
        #     # greater than the minimum confidence
        #     if confidence < 0.75:
        #         continue
        #     # compute the (x, y)-coordinates of the bounding box for the
        #     # object
        #     box = detections[0, 0, i, 3:7] * np.array( [w, h, w, h] )
        #     (startX, startY, endX, endY) = box.astype( "int" )
        #
        #     # draw the bounding box of the face along with the associated
        #     # probability
        #     text = "{:.2f}%".format( confidence * 100 )
        #     y = startY - 10 if startY - 10 > 10 else startY + 10
        #     cv2.rectangle( fr, (startX, startY), (endX, endY),
        #                    (0, 0, 255), 2 )
        #     cv2.putText( fr, text, (startX, y),
        #                  cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2 )
        #
        #

        pixels = np.asarray(fr)
        detector = MTCNN()
        result = detector.detect_faces(pixels)
        if result:
            for person in result:
                bounding_box = person['box']

                cv2.rectangle(fr, (bounding_box[0], bounding_box[1]),
                              (bounding_box[0] + bounding_box[2],
                               bounding_box[1] + bounding_box[3]),
                              (0, 155, 255), 2)
            fc = pixels[bounding_box[1]:bounding_box[1] + bounding_box[3],
                        bounding_box[0]:bounding_box[0] + bounding_box[2]]
            print(fc)
            roi = cv2.resize(fc, (224, 224))
            print(roi)
            with session.graph.as_default():
                k.backend.set_session(session)
                pred = model.predict_emotion(roi[np.newaxis, :, :])
            cv2.putText(fr, pred, (bounding_box[0], bounding_box[1]), font, 2,
                        (0, 0, 255), 3)
        # # # # cv2.rectangle( fr, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (0, 255, 0), 2 )

        _, jpeg = cv2.imencode('.jpg', fr)
        return jpeg.tobytes()
Пример #10
0
class FacenetEngine(object):
    """
    facenet engine class
    """
    # クラス変数
    # encodeのベクトルサイズ = 128, それぞれの値は-2~2である。
    # そのため、max_distance = 11.3
    __distance_threshold = 11.0
    __debug_mode = True
    __bound = 18
    __encode_features_vector_length = 128

    def __init__(self):
        """
        コンストラクト
        """
        cur_dir = os.path.abspath(os.path.join(os.path.dirname(__file__)))

        # facenet model path
        model_path = "./model/keras-facenet/model/facenet_keras.h5"
        model_path = os.path.join(cur_dir, model_path)

        # モデルパスの存在チェック
        if osp.exists(model_path) is False:
            raise ValueError('{} not Exist'.format(model_path))

        # set for facenet model
        self.__model_path = model_path
        self.model = load_model(self.__model_path, compile=False)

        # transfer learning model
        self.transfer_model = self.make_transfer_learning_model()
        print(self.transfer_model.input)
        print(self.transfer_model.output)

        # create new detector, using default weights from mtcnn
        self.__detector = MTCNN()

        # set classifier model
        classifier_filename = "./model/SVM_classifer.pkl"
        classifier_filename = os.path.join(cur_dir, classifier_filename)
        self.__classifier_filename = classifier_filename

        # TODO: remove these
        self.__data_set_path = "../../../dataset/train/japanese/"
        sub_dirs = glob(self.__data_set_path + '*/')
        self.__people = [os.path.dirname(sub_dir) for sub_dir in sub_dirs]

        # Get current data from DB
        self.all_anchors = db_util.get_all_encode()

    # --------------------------------------------------------------------------------
    # Public function
    # --------------------------------------------------------------------------------
    def recognize(self, image_path, image_data=None):
        """
        指定するイメージを認証する
        :param image_path:
        :return: name
        """
        errcode, name, user_id, department = 0, 'Unknown', -1, 'Unknown'

        # check arguments
        if image_data is None:
            if osp.exists(image_path) is False:
                raise ValueError('file not found {}'.format(image_path))

        # make encode from image_path
        if image_data is None:
            errcode, img_encode = self.make_encode(image_path)
        else:
            errcode, img_encode = self.make_encode(image_path, image_data=image_data)

        if errcode is 0:
            # get all encodes fromdb
            all_anchors = self.all_anchors

            distances = list()
            for anchor in all_anchors:
                if len(anchor['encode']) == FacenetEngine.__encode_features_vector_length:
                    distances.append(distance.euclidean(img_encode, anchor['encode']))

            if FacenetEngine.__debug_mode is True:
                print('img_coode = {}'.format(type(img_encode)))
                print("length of all encode in database: {}".format(len(all_anchors)))
                print("min of distances = {}".format(min(distances)))
                print("max of distances = {}".format(max(distances)))

            if np.max(distances) < FacenetEngine.__bound:
                if min(distances) < FacenetEngine.__distance_threshold:
                    anchor_idx = distances.index(min(distances))
                    name = all_anchors[anchor_idx]['name']
                    user_id = all_anchors[anchor_idx]['id']
                    department = all_anchors[anchor_idx]['department']
                else:
                    print(distances)
                    print(FacenetEngine.__distance_threshold)

            if FacenetEngine.__debug_mode is True:
                print('name = {}, id = {}, department = {}'.format(name, user_id, department))

        return errcode, name, user_id, department

    '''
    
    Training SVM 
    
    '''

    def extract_face(self, file_path, image_data=None, required_size=(160, 160)):
        """
        extract face for further steps
        Calling::
        faces  = extract_face(file_path)
        Args::
            _ filename: path of images file
            _ require_size: required size of training model

        Returns::
            _ face_array: Numpy array contains bounding box information
            -
        Details::
            - get_trained_data
        """
        errcode, face_array = 0, np.array([])
        if image_data is None:
            # load image from file
            image = Image.open(file_path)
        else:
            image = image_data

        # convert to RGB, if needed
        img = image.convert('RGB')
        # conver to array
        pixels = asarray(img)

        # detect faces in the image
        results = self.__detector.detect_faces(pixels)
        if len(results) < 1:
            errcode = -1
        else:
            # extract the bounding box
            x1, y1, width, height = results[0]['box']
            # resize pixels to the model size
            x1, y1 = abs(x1), abs(y1)
            x2, y2 = x1 + width, y1 + height
            # extract the face
            face = pixels[y1:y2, x1:x2]
            # TODO: Debug時に、以下の画像を出力する。
            # cv2.imwrite("check.jpg", face)
            # resize pixels to required size of further steps
            img = Image.fromarray(face)
            img = img.resize(required_size)
            face_array = asarray(img)
        return errcode, face_array

    def extract_face_for_preprocessing(self, file_path, required_size=(160, 160)):
        """
        extract face for further steps
        Calling::
        faces  = extract_face(file_path)
        Args::
            _ filename: path of images file
            _ require_size: required size of training model

        Returns::
            _ face_array: Numpy array contains bounding box information
            -
        Details::
            - get_trained_data
        """
        errcode, face_array = 0, np.array([])
        # load image from file
        image = Image.open(file_path)
        # convert to RGB, if needed
        img = image.convert('RGB')
        # conver to array
        pixels = asarray(img)
        # detect faces in the image
        results = self.__detector.detect_faces(pixels)
        if len(results) < 1:
            errcode = -1
        else:
            # extract the bounding box
            x1, y1, width, height = results[0]['box']
            # resize pixels to the model size
            x1, y1 = abs(x1), abs(y1)
            x2, y2 = x1 + width, y1 + height
            # extract the face
            face = pixels[y1:y2, x1:x2]
            # resize pixels to required size of further steps
            img = Image.fromarray(face)
            img = img.resize(required_size)
            face_array = asarray(img)
        return errcode, face_array

    def load_data_set(self, require_size=(160, 160)):
        """
        Load face locations from data_set
        Calling::
            faces = load_faces(directory)
        Args::
            -

        Returns::
            - asarray (X): Numpy array contains bounding box information for face position
            - asarray(Y):  Numpy array contains labels

        Raises::
            -
        Details::
            - Load face locations from data_set
        """
        X, Y = list(), list()

        # enumerate folders, on per class
        for subdir in listdir(self.__data_set_path):
            faces = list()
            # path
            path = self.__data_set_path + subdir + '/'

            # skip any files that might be in the dir
            if not isdir(path):
                continue
            for name in listdir(path):
                file_path = path + name
                print(file_path)
                # extract face
                face = self.extract_face(file_path)
                faces.append(face)

            # create labels
            labels = [subdir for _ in range(len(faces))]
            # summarize progress
            print('>loaded %d examples for class: %s' % (len(faces), subdir))
            # storing faces
            X.extend(faces)
            Y.extend(labels)

        return asarray(X), asarray(Y)

    def convert(self, faces):
        """
        Load faces dataset (160, 160, 3) to encode into embedding 128d vector
        """

        new = list()
        # Training dataset
        # Convert each face to an encoding
        for face in faces:
            embed = self.encoding(self.model, face)
            new.append(embed)
        new = np.asarray(new)
        # Checking new dataset dimemsion
        return new

    @staticmethod
    def encoding(model, faces):
        """
        Load facenet pretrained model and encoding using predict function of Keras
        """
        # Scale pixel values
        faces = faces.astype('float32')
        # Standardize pixel value across channels (global)
        mean, std = faces.mean(), faces.std()
        faces = (faces - mean) / std
        # Transform face into one sample
        samples = np.expand_dims(faces, axis=0)
        # Make prediction to get encoding
        Y_hat = model.predict(samples)
        # TODO: Normalizationが必要かどうかを要検討
        # Y_hat_norm = [((i - min(Y_hat[0])) / (max(Y_hat[0]) - min(Y_hat[0]))) for i in Y_hat[0]]

        return Y_hat[0]

    @staticmethod
    def l2_normalizer(x, axis=-1, epsilon=1e-10):
        """
        標準化
        """
        output = x / np.sqrt(np.maximum(np.sum(np.square(x), axis=axis, keepdims=True), epsilon))
        return output

    def make_faces_encoding_labels(self):
        """
        画像より128次元の特徴値に変換する
        """
        faces, labels = self.load_data_set()

        print(faces.shape)  # データ数, 160, 160, 3
        print(labels.shape)  # データ数

        # Encoding faces
        faces_encoding = self.convert(faces)
        print(faces_encoding.shape)  # データ数, 128

        # Normalize
        faces_encoding = self.l2_normalizer(faces_encoding)

        return faces_encoding, labels

    def train(self):
        """
        Train SVM model on given dataset
        """
        encodes = db_util.get_all_encode()

        faces_encoding = []
        labels = []
        for encode in encodes:
            if len(encode['encode']) == FacenetEngine.__encode_features_vector_length:
                faces_encoding.append(encode['encode'])
                labels.append(encode['id'])
            else:
                print("length is not {} encode: {}".format(FacenetEngine.__encode_features_vector_length,
                                                           len(encode['encode'])))

        # Label encode targets
        encoder = LabelEncoder()
        encoder.fit(labels)
        normalized_labels = encoder.transform(labels)

        normalized_labels = np.array(normalized_labels)
        faces_encoding = np.array(faces_encoding)

        # Fit into SVM model
        model = SVC(kernel='linear', probability=True)
        model.fit(faces_encoding, normalized_labels)
        joblib.dump(model, self.__classifier_filename)
        print('Save')

    def preprocessing(self, input_folder, output_folder):
        """
        Extract face from input image and save as output image

        Args:
            - input_folder(str)   :     path of input data folder (will process all image in all sub dir of input folder)
            - output_folder(str)  :    path of output data folder (output folder structure is the same as structure
                                            of input data folder)

        Details:
            - input images size   : any
            - output images size  : 160*160*3 (RGB)
        """
        for cur, dirs, _ in os.walk(input_folder):
            for sub_dir in dirs:
                for curDir, subDirs, files in os.walk(os.path.join(input_folder, sub_dir)):
                    for file in files:
                        file_path = os.path.join(curDir, file)
                        filename, file_extension = os.path.splitext(file_path)
                        out_path = os.path.join(output_folder, sub_dir)
                        if not os.path.exists(out_path):
                            os.mkdir(out_path)
                        output_file_path = os.path.join(out_path, file)
                        if 'jpeg' in file_extension:
                            errcode, face = self.extract_face_for_preprocessing(file_path)
                            if errcode is 0:
                                try:
                                    pil_img = Image.fromarray(face)
                                    pil_img.save(output_file_path)
                                except Exception as e:
                                    print("process image {} get error {}".format(file, e))
                            else:
                                print("process image {} get error when extract face".format(file))

    def make_transfer_learning_model(self):
        """
        making transfer learning model from facenet
        input: 160,160,3
        output: 128
        """
        model = self.model
        # Freeze the layers
        for layer in model.layers[:424]:
            layer.trainable = False
        model.layers.pop()
        # Adding custom Layers
        x = model.layers[-1].output
        predictions = Dense(26, activation="softmax", kernel_regularizer=regularizers.l2(0.01))(x)

        # creating the final model
        model_final = Model(input=model.input, output=predictions)

        return model_final

    def transfer_learning(self, train_data_dir, validation_data_dir, epochs):
        # compile the model
        self.transfer_model.compile(loss="categorical_crossentropy", optimizer=optimizers.SGD(lr=1e-3, momentum=0.9),
                                    metrics=["accuracy"])

        # Initiate the train and test generators with data Augumentation
        # Save the model according to the conditions
        checkpoint = ModelCheckpoint("facenet_transfer_weight.h5", monitor='val_accuracy', verbose=2,
                                     save_best_only=True,
                                     save_weights_only=False, mode='auto', period=1)
        early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=100, verbose=1, mode='auto')

        temp_path = os.path.join(os.getcwd(), "temp")
        train_data_path = os.path.join(temp_path, "train")
        val_data_path = os.path.join(temp_path, "val")

        # doing preprocessing when temp dir not exist
        if not os.path.exists(temp_path):
            os.mkdir(temp_path)
            if not os.path.exists(train_data_path):
                os.mkdir(train_data_path)

            if not os.path.exists(val_data_path):
                os.mkdir(val_data_path)

            self.preprocessing(train_data_dir, train_data_path)
            self.preprocessing(validation_data_dir, val_data_path)

        train_datagen = ImageDataGenerator(
            featurewise_center=True,
            featurewise_std_normalization=True,
            rotation_range=20,
            width_shift_range=0.2,
            height_shift_range=0.2,
            horizontal_flip=True)

        test_datagen = ImageDataGenerator(rescale=1. / 255)

        train_generator = train_datagen.flow_from_directory(
            train_data_path,
            target_size=(160, 160),
            batch_size=32,
            class_mode="categorical")

        validation_generator = test_datagen.flow_from_directory(
            val_data_path,
            target_size=(160, 160),
            class_mode="categorical")

        # Train the model
        history = self.transfer_model.fit_generator(
            train_generator,
            steps_per_epoch=2,
            epochs=epochs,
            validation_data=validation_generator,
            validation_steps=2,
            callbacks=[checkpoint, early])

        return history

    '''
    Predicting  
    '''

    def make_encode(self, input_image, image_data=None):
        """
        Make embedding vector (128-dimensions) from one image
        """
        errcode, embed, face_img_receiver_mode = 0, np.array([]), True
        if image_data is None:
            errcode, face = self.extract_face(input_image)
        else:
            # TODO: (CongThanh) Consider when merge with facenet_engine.py
            # Add-in for face data receiver
            # NOTE: if image shape equals to face shape
            if face_img_receiver_mode:
                img = image_data.convert('RGB')
                # conver to array
                pixels = asarray(img)
                faces = []
                faces.append(pixels)
                embed = self.convert(faces)
            else:
                errcode, face = self.extract_face(input_image, image_data=image_data)
                if errcode is 0:
                    faces = []
                    faces.append(face)
                    embed = self.convert(faces)
        return errcode, embed

    def predict(self, input_image):
        """
        Predicting the class of input image using pretrained model on Japanese dataset
        """
        errcode, predictions = 0, None
        errcode, embed = self.make_encode(input_image)
        if errcode is 0:
            model = joblib.load(self.__classifier_filename)
            predictions = model.predict_proba(embed)

        return errcode, predictions
Пример #11
0
import cv2
from sort import *
from util import *


#vid = cv2.VideoCapture(0)
vid = cv2.VideoCapture('test.mp4')
video_frame_cnt = int(vid.get(7))
video_width = int(vid.get(3))
video_height = int(vid.get(4))
video_fps = int(vid.get(5))
record_video = True
#ecord_video = False
if record_video:
    out = cv2.VideoWriter('data/outvideo.avi',cv2.VideoWriter_fourcc('M','J','P','G'), video_fps, (video_width, video_height)) # for writing Video
face_detector = MTCNN()  #Initializing MTCNN detector object
face_tracker  = Sort(max_age=50)   #Initializing SORT tracker object

ret , frame = vid.read()
while ret:
    try: 
        ret , frame = vid.read()
        original_frame = frame.copy()
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        result = face_detector.detect_faces(frame)
        min_confidence= 0.4

        box = []
        for i in range(len(result)):
            
            box_ = result[i]["box"]
Пример #12
0
from mtcnn.mtcnn import MTCNN
import cv2
import dlib
import numpy as np
import os
		
detector1 = MTCNN()
detector2 = dlib.get_frontal_face_detector()
modelFile = "models/res10_300x300_ssd_iter_140000.caffemodel"
configFile = "models/deploy.prototxt.txt"
net = cv2.dnn.readNetFromCaffe(configFile, modelFile)

classifier2 = cv2.CascadeClassifier('models/haarcascade_frontalface2.xml')
images = os.listdir('faces')
# os.makedirs('faces/dlib')
# os.makedirs('faces/mtcnn')
# os.makedirs('faces/dnn')
# os.makedirs('faces/haar')

for image in images:
    img = cv2.imread(os.path.join('faces', image))
    # img = cv2.resize(img, None, fx=2, fy=2)
    height, width = img.shape[:2]
    img1 = img.copy()
    img2 = img.copy()
    img3 = img.copy()
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # detect faces in the image
    faces1 = detector1.detect_faces(img_rgb)
    
Пример #13
0
# Load pretrained Inception-ResNet-v1 model
# Update model and weights path according to your working environment

model_path = "Models/Inception_ResNet_v1.json"
weights_path = "Models/facenet_keras_weights.h5"
# weights_path = "enc1_model_weights.h5"

json_file = open(model_path, 'r')
loaded_model_json = json_file.read()
json_file.close()
print(loaded_model_json)
enc_model = model_from_json(loaded_model_json)
enc_model.load_weights(weights_path)

mtcnn_detector = MTCNN()


class Ui_Form():
    lastLabel = ""

    def setupUi(self, Form):
        Form.setObjectName("Nhan Dien Khuon Mat")
        Form.resize(1100, 768)
        self.videoCapture = QtWidgets.QLabel(Form)
        self.videoCapture.setGeometry(QtCore.QRect(300, 120, 640, 480))
        self.videoCapture.setFrameShape(QtWidgets.QFrame.Box)
        self.videoCapture.setFrameShadow(QtWidgets.QFrame.Raised)
        self.videoCapture.setLineWidth(6)
        self.videoCapture.setText("")
        self.videoCapture.setObjectName("videoCapture")
Пример #14
0
# Give the image link
url = "https://upload.wikimedia.org/wikipedia/commons/thumb/8/8d/Channing_Tatum_by_Gage_Skidmore_3.jpg/330px-Channing_Tatum_by_Gage_Skidmore_3.jpg"

# Open the link and save the image to res
res = request.urlopen(url)
# Read the res object and convert it to an array
img = np.asarray(bytearray(res.read()), dtype='uint8')
# Add the color variable
img = cv2.imdecode(img, cv2.IMREAD_COLOR)
# Show the image
cv2_imshow(img)

"""# Step 2: Face detection"""

# Initialize mtcnn detector
detector = MTCNN()

# set face extraction parameters
target_size = (224,224) # output image size
border_rel = 0 # increase or decrease zoom on image

# detect faces in the image
detections = detector.detect_faces(img)
print(detections)

x1, y1, width, height = detections[0]['box']
dw = round(width * border_rel)
dh = round(height * border_rel)
x2, y2 = x1 + width + dw, y1 + height + dh
face = img[y1:y2, x1:x2]
Пример #15
0
with open('persons.txt', 'r') as f:
    persons = f.readlines()

def find_boxes(faces):
    boxes = []
    for result in faces:
        if result['confidence'] > .9:
            x, y, width, height = result['box']
            x_max = x + width
            y_max = y + height
            boxes.append((y, x+width, y+height, x))
    return boxes

cap = cv2.VideoCapture(0)

detector = MTCNN()

while True:
    ret, img = cap.read()
    if ret == True:
        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        faces = detector.detect_faces(rgb)
        boxes = find_boxes(faces)
        t = time.time()
        embeddings = face_recognition.face_encodings(rgb, boxes, num_jitters = 1)    
        for i, embedding in enumerate(embeddings):
            matches = []
            for person in persons:
                match = face_recognition.compare_faces(embedding_dict[person.rstrip()], embedding, tolerance = .55)
                matches.append(sum(match))
            cv2.rectangle(img, (boxes[i][3], boxes[i][0]), (boxes[i][1], boxes[i][2]), (255, 0, 0), 2)
Пример #16
0
#define store path
store_root_dir = ".\\result"
store_image_dir = os.path.join(store_root_dir, "CACD2000")
if os.path.exists(store_image_dir) is False:
    os.makedirs(store_image_dir)

#define some param for mtcnn
src = np.array([[30.2946, 51.6963], [65.5318, 51.5014], [48.0252, 71.7366],
                [33.5493, 92.3655], [62.7299, 92.2041]],
               dtype=np.float32)
threshold = [0.6, 0.7, 0.9]
factor = 0.85
minSize = 20
imgSize = [120, 100]
detector = MTCNN(steps_threshold=threshold,
                 scale_factor=factor,
                 min_face_size=minSize)

#align,crop and resize
keypoint_list = ['left_eye', 'right_eye', 'nose', 'mouth_left', 'mouth_right']

for filename in tqdm(os.listdir(image_root_dir)):
    dst = []
    filepath = os.path.join(image_root_dir, filename)
    storepath = os.path.join(store_image_dir, filename)
    npimage = np.array(Image.open(filepath))
    #Image.fromarray(npimage.astype(np.uint8)).show()

    dictface_list = detector.detect_faces(
        npimage
    )  #if more than one face is detected, [0] means choose the first face
Пример #17
0
def main():

    weight_file = "../pre-trained/megaface_asian/ssrnet_3_3_3_64_1.0_1.0/ssrnet_3_3_3_64_1.0_1.0.h5"

    # for face detection
    # detector = dlib.get_frontal_face_detector()
    detector = MTCNN()
    try:
        os.mkdir('./img')
    except OSError:
        pass
    # load model and weights
    img_size = 64
    stage_num = [3, 3, 3]
    lambda_local = 1
    lambda_d = 1
    model = SSR_net(img_size, stage_num, lambda_local, lambda_d)()
    model.load_weights(weight_file)

    clip = VideoFileClip(sys.argv[1])  # can be gif or movie

    #python version
    pyFlag = ''
    if len(sys.argv) < 3:
        pyFlag = '2'  #default to use moviepy to show, this can work on python2.7 and python3.5
    elif len(sys.argv) == 3:
        pyFlag = sys.argv[2]  #python version
    else:
        print('Wrong input!')
        sys.exit()

    img_idx = 0
    detected = ''  #make this not local variable
    time_detection = 0
    time_network = 0
    time_plot = 0
    ad = 0.4
    skip_frame = 5  # every 5 frame do 1 detection and network forward propagation
    for img in clip.iter_frames():
        img_idx = img_idx + 1

        input_img = img  #using python2.7 with moivepy to show th image without channel flip

        if pyFlag == '3':
            input_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        img_h, img_w, _ = np.shape(input_img)
        input_img = cv2.resize(input_img, (1024, int(1024 * img_h / img_w)))
        img_h, img_w, _ = np.shape(input_img)

        if img_idx == 1 or img_idx % skip_frame == 0:

            # detect faces using dlib detector
            start_time = timeit.default_timer()
            detected = detector.detect_faces(input_img)
            elapsed_time = timeit.default_timer() - start_time
            time_detection = time_detection + elapsed_time
            faces = np.empty((len(detected), img_size, img_size, 3))

            for i, d in enumerate(detected):
                print(i)
                print(d['confidence'])
                if d['confidence'] > 0.95:
                    x1, y1, w, h = d['box']
                    x2 = x1 + w
                    y2 = y1 + h
                    xw1 = max(int(x1 - ad * w), 0)
                    yw1 = max(int(y1 - ad * h), 0)
                    xw2 = min(int(x2 + ad * w), img_w - 1)
                    yw2 = min(int(y2 + ad * h), img_h - 1)
                    cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0),
                                  2)
                    # cv2.rectangle(img, (xw1, yw1), (xw2, yw2), (255, 0, 0), 2)
                    faces[i, :, :, :] = cv2.resize(
                        input_img[yw1:yw2 + 1, xw1:xw2 + 1, :],
                        (img_size, img_size))

            start_time = timeit.default_timer()
            if len(detected) > 0:
                # predict ages and genders of the detected faces
                results = model.predict(faces)
                predicted_ages = results

            # draw results
            for i, d in enumerate(detected):
                if d['confidence'] > 0.95:
                    x1, y1, w, h = d['box']
                    label = "{}".format(int(predicted_ages[i]))
                    draw_label(input_img, (x1, y1), label)
            elapsed_time = timeit.default_timer() - start_time
            time_network = time_network + elapsed_time

            start_time = timeit.default_timer()

            if pyFlag == '2':
                img_clip = ImageClip(input_img)
                img_clip.show()
                cv2.imwrite('img/' + str(img_idx) + '.png',
                            cv2.cvtColor(input_img, cv2.COLOR_RGB2BGR))
            elif pyFlag == '3':
                cv2.imshow("result", input_img)
                cv2.imwrite('img/' + str(img_idx) + '.png',
                            cv2.cvtColor(input_img, cv2.COLOR_RGB2BGR))

            elapsed_time = timeit.default_timer() - start_time
            time_plot = time_plot + elapsed_time

        else:
            for i, d in enumerate(detected):
                if d['confidence'] > 0.95:
                    x1, y1, w, h = d['box']
                    x2 = x1 + w
                    y2 = y1 + h
                    xw1 = max(int(x1 - ad * w), 0)
                    yw1 = max(int(y1 - ad * h), 0)
                    xw2 = min(int(x2 + ad * w), img_w - 1)
                    yw2 = min(int(y2 + ad * h), img_h - 1)
                    cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0),
                                  2)
                    # cv2.rectangle(img, (xw1, yw1), (xw2, yw2), (255, 0, 0), 2)
                    faces[i, :, :, :] = cv2.resize(
                        input_img[yw1:yw2 + 1, xw1:xw2 + 1, :],
                        (img_size, img_size))

            # draw results
            for i, d in enumerate(detected):
                if d['confidence'] > 0.95:
                    x1, y1, w, h = d['box']
                    label = "{}".format(int(predicted_ages[i]))
                    draw_label(input_img, (x1, y1), label)

            start_time = timeit.default_timer()
            if pyFlag == '2':
                img_clip = ImageClip(input_img)
                img_clip.show()
            elif pyFlag == '3':
                cv2.imshow("result", input_img)
            elapsed_time = timeit.default_timer() - start_time
            time_plot = time_plot + elapsed_time

        #Show the time cost (fps)
        print('avefps_time_detection:', img_idx / time_detection)
        print('avefps_time_network:', img_idx / time_network)
        print('avefps_time_plot:', img_idx / time_plot)
        print('===============================')
        if pyFlag == '3':
            key = cv2.waitKey(30)
            if key == 27:
                break
Пример #18
0
def process_video(input_video_path, output_video_path):
    video_capture = cv.VideoCapture(input_video_path)
    video_writer = cv.VideoWriter(
        output_video_path, cv.VideoWriter_fourcc('F', 'M', 'P', '4'),
        video_capture.get(cv.CAP_PROP_FPS),
        (int(video_capture.get(3)), int(video_capture.get(4))))
    success, frame = video_capture.read()
    count = 0
    detector = MTCNN()
    net = facecnn.FACECNN()
    classifier_filename_exp = './svm_weights/params'
    with open(classifier_filename_exp, 'rb') as infile:
        model = pickle.load(infile)
    print('Loaded classifier model from file "%s"' % classifier_filename_exp)
    while success:
        frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
        result = detector.detect_faces(frame)
        # print(result)
        frame_faces = []
        detected_faces = []
        for i in range(len(result)):
            bounding_box = result[i]['box']
            # now crop out the face
            # print(np.shape(frame))
            face = frame[bounding_box[1]:bounding_box[1] + bounding_box[3],
                         bounding_box[0]:bounding_box[0] + bounding_box[2], :]
            if (face.size > 0):
                # cv.rectangle(frame, (bounding_box[0], bounding_box[1]), (bounding_box[0] + bounding_box[2], bounding_box[1] + bounding_box[3]), (255, 0, 255), 2)
                # face = cv.cvtColor(face, cv.COLOR_RGB2BGR)
                face = cv.resize(face, (160, 160))
                frame_faces.append(face)
                detected_faces.append(result[i]['box'])
                # face_embedding = net.get_embeddings()
                # cv.imwrite(os.path.join(output_dir, 'frame' + str(count) + '_face' + str(i) + '.bmp'), face)

        # print(np.shape(frame_faces)[0])
        embeddings = net.get_embeddings(frame_faces)
        predictions = model.predict_proba(embeddings)
        best_class_indices = np.argmax(predictions, axis=1)
        # print(np.shape(detected_faces), np.shape(best_class_indices))
        for box, prob in zip(detected_faces, best_class_indices):
            if (prob):
                cv.rectangle(frame, (box[0], box[1]),
                             (box[0] + box[2], box[1] + box[3]), (0, 255, 0),
                             2)
            else:
                cv.rectangle(frame, (box[0], box[1]),
                             (box[0] + box[2], box[1] + box[3]), (255, 0, 0),
                             2)
        frame = cv.cvtColor(frame, cv.COLOR_RGB2BGR)
        #do something with the result
        cv.imshow('Processed Frame', frame)
        if cv.waitKey(25) & 0xFF == ord('q'):
            break
        video_writer.write(frame)
        print('\rFrame {}/{}'.format(
            count, int(video_capture.get(cv.CAP_PROP_FRAME_COUNT))),
              end='')
        success, frame = video_capture.read()
        count += 1
    video_capture.release()
    video_writer.release()
    print('\nDone')
Пример #19
0
from matplotlib.patches import Circle
#filter out all tensorflow warnings and info 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# draw each face separately
def draw_faces(filename, result_list):
	# load the image
	data = pyplot.imread(filename)
	# plot each face as a subplot
	for i in range(len(result_list)):
		# get coordinates
		x1, y1, width, height = result_list[i]['box']
		x2, y2 = x1 + width, y1 + height
		# define subplot
		pyplot.subplot(1, len(result_list), i+1)
		pyplot.axis('off')
		# plot face
		pyplot.imshow(data[y1:y2, x1:x2])
	# show the plot
	pyplot.show()

# load image from file
filename = 'test2.jpg'
pixels = pyplot.imread(filename)
# create the detector, using default weights
detector = MTCNN()
# detect faces in the image
faces = detector.detect_faces(pixels)
# display faces on the original image
draw_faces(filename, faces)
class FaceDetector:
    # local variables that don't change while program is running
    # the most important are refresh_rate (how often searching on full screen towards faces will be performed)
    # and radius (radius of searching for a local search)
    min_YCrCb = np.array([0, 133, 77], np.uint8)
    max_YCrCb = np.array([255, 173, 127], np.uint8)
    kernel = np.ones((5, 5), np.uint8)
    refresh_rate = 60
    radius = 40
    small_radius = 40
    detector = MTCNN()
    tracker = dlib.correlation_tracker()

    def __init__(self):
        # local variables useful for continuous detection for one instance of the detector
        # most important are counter (at which frame we are at)
        # and Locations[] (list of approximated locations where faces are on this frame)
        self.counter = 0
        self.Locations = []
        self.trackers = []

    # method for getting mask of faces, using skin extraction based on our Locations[] and skin color values in YCrCb
    # color space
    def get_mask(self, frame):
        new_frame = np.zeros((1080, 1920, 3), np.uint8)
        for i in range(len(self.Locations)):
            if 2 * self.Locations[i][0] > 10 and 2 * self.Locations[i][2] < 1070 and 2 * self.Locations[i][1] < 1910 \
                    and 2 * self.Locations[i][3] > 10:
                face_cropped = frame[(2 * self.Locations[i][0] -
                                      10):(2 * self.Locations[i][2] + 10),
                                     (2 * self.Locations[i][3] -
                                      10):(2 * self.Locations[i][1] + 10)]
                imageYCrCb = cv2.cvtColor(face_cropped, cv2.COLOR_BGR2YCR_CB)
                imageYCrCb = cv2.erode(imageYCrCb, self.kernel, iterations=6)
                imageYCrCb = cv2.dilate(imageYCrCb, self.kernel, iterations=6)
                skinRegionYCrCb = cv2.inRange(imageYCrCb, self.min_YCrCb,
                                              self.max_YCrCb)
                mask = np.zeros_like(face_cropped)
                ellipse_points = self.extract_ellipse_points(i)
                ellipse = cv2.fitEllipse(ellipse_points)
                mask = cv2.ellipse(mask,
                                   ellipse,
                                   color=(255, 255, 255),
                                   thickness=-1)
                face_cropped = cv2.bitwise_and(face_cropped,
                                               mask,
                                               mask=skinRegionYCrCb)
                new_frame[(2 * self.Locations[i][0] -
                           10):(2 * self.Locations[i][2] + 10),
                          (2 * self.Locations[i][3] -
                           10):(2 * self.Locations[i][1] + 10)] = face_cropped
        return new_frame

    # method for performing whole frame search for faces using MTCNN face detector
    # Locations[] are filled with the coordinates of the faces
    def full_search(self, frame):
        detected_faces = self.detector.detect_faces(frame)
        self.Locations.clear()
        for result in detected_faces:
            x, y, width, height = result['box']
            left = x
            right = x + width
            top = y
            bottom = y + height
            self.Locations.append([top, right, bottom, left])

    def new_search(self, frame, head_list):
        self.Locations.clear()
        for element in head_list:
            left_margin = int(element[0] // 2 - self.small_radius)
            right_margin = int(element[0] // 2 + self.small_radius)
            top_margin = int(element[1] // 2 - self.small_radius)
            bot_margin = int(element[1] // 2 + self.small_radius)
            if left_margin < 0:
                left_margin = 0
            if right_margin >= 960:
                right_margin = 956
            if top_margin < 0:
                top_margin = 0
            if bot_margin >= 540:
                bot_margin = 539
            cropped = frame[top_margin:bot_margin, left_margin:right_margin]
            detected_faces = self.detector.detect_faces(cropped)
            for result in detected_faces:
                x, y, width, height = result['box']
                left = x
                right = x + width
                top = y
                bottom = y + height
                self.Locations.append([
                    top_margin + top, left_margin + right, top_margin + bottom,
                    left_margin + left
                ])
        return frame

    # method for starting multiple dlib trackers for objects located in Locations[]
    def start_trackers(self, frame):
        new_trackers = []
        for result in self.Locations:
            maxArea = 0
            x = 0
            y = 0
            w = 0
            h = 0
            if (result[1] - result[3]) * (result[2] - result[0]) > maxArea:
                x = int(result[3])
                y = int(result[0])
                w = int(result[1] - result[3])
                h = int(result[2] - result[0])
                maxArea = w * h
            if maxArea > 0:
                t = dlib.correlation_tracker()
                t.start_track(frame, dlib.rectangle(x, y, x + w, y + h))
                new_trackers.append(t)
        return new_trackers

    # method of searching for a one face on a small area (performed when tracker has lost tracking object) based on the
    # last useful tracker location
    # Locations[] gets updated for specific index
    def small_search(self, frame, index):
        if (self.Locations[index][0] - self.radius >
                0) and (self.Locations[index][3] - self.radius > 0) and (
                    self.Locations[index][2] + self.radius <
                    540) and (self.Locations[index][1] + self.radius < 960):
            cropped = frame[(self.Locations[index][0] -
                             self.radius):(self.Locations[index][2] +
                                           self.radius),
                            (self.Locations[index][3] -
                             self.radius):(self.Locations[index][1] +
                                           self.radius)]
            detected_faces = self.detector.detect_faces(cropped)
            for result in detected_faces:
                x, y, width, height = result['box']
                left = x
                right = x + width
                top = y
                bottom = y + height
            for j in range(len(detected_faces)):
                self.Locations[index][1] = (self.Locations[index][3] + right -
                                            self.radius)
                self.Locations[index][2] = (self.Locations[index][0] + bottom -
                                            self.radius)
                self.Locations[index][0] = (self.Locations[index][0] + top -
                                            self.radius)
                self.Locations[index][3] = (self.Locations[index][3] + left -
                                            self.radius)
            return len(detected_faces)

    # writing current tracking location into Location[] assuming face is the written area
    def save_location(self, x, y, w, h, index):
        self.Locations[index][0] = y
        self.Locations[index][1] = x + w
        self.Locations[index][2] = y + h
        self.Locations[index][3] = x

    # method for unpacking position returned by tracker.getPosition() method (dlib.Rectangle)
    def unpack_position(self, box):
        x = int(box.left())
        y = int(box.top())
        w = int(box.width())
        h = int(box.height())
        return x, y, w, h

    # method for data preparation by translating our Location[] system into dlib.Rectangle used by trackers
    def extract_box(self, index):
        x = int(self.Locations[index][3])
        y = int(self.Locations[index][0])
        w = int(self.Locations[index][1] - self.Locations[index][3])
        h = int(self.Locations[index][2] - self.Locations[index][0])
        return x, y, w, h

    def extract_ellipse_points(self, index):
        _, _, w, h = self.extract_box(index)
        w *= 2
        h *= 2
        points = [[10, 10], [10, h + 10], [w + 10, (h + 10) / 2],
                  [10, (h + 10) / 2], [(w + 10) / 2, h], [(w + 10) / 2, 10],
                  [w + 10, h + 10], [w + 10, 10]]
        return np.array(points, dtype=np.int32)

    # main method where all magic happens
    def face_processing(self, frame, heads):
        # Downsampled image used only for search algorithm
        small_frame = cv2.resize(frame, (960, 540), 0, 0)

        # every (refresh_rate) frames search based on head locations is performed
        # using MTCNN detector
        # and also trackers are being refreshed
        if self.counter % self.refresh_rate == 0:
            self.new_search(small_frame, heads)
        elif self.counter % self.refresh_rate == 1:
            self.trackers = self.start_trackers(small_frame)

        # in standard case scenario basic tracking is performed
        # trackers are being updated
        else:
            if self.counter % 3 != 2:
                if len(self.trackers) != 1:
                    mark = len(self.trackers)
                    half = int(mark / 2)
                    j = 0
                    if (self.counter % 3 == 0):
                        for i in range(0, half):
                            trackingQuality = self.trackers[i - j].update(
                                small_frame)
                            tracked_position = self.trackers[i -
                                                             j].get_position()
                            t_x, t_y, t_w, t_h = self.unpack_position(
                                tracked_position)
                            if trackingQuality >= 4.0:
                                self.save_location(t_x, t_y, t_w, t_h, i)

                            # in case of losing tracked object from a tracker window searching using MTCNN detector on a small
                            # area is performed using area of last known position of a tracker window
                            else:
                                self.trackers.pop(i)
                                check = self.small_search(small_frame, i)
                                j += 1
                                if check != 0:
                                    x, y, w, h = self.extract_box(i)
                                    t = dlib.correlation_tracker()
                                    t.start_track(
                                        small_frame,
                                        dlib.rectangle(x, y, x + w, y + h))
                                    self.trackers.insert(i, t)
                    else:
                        for i in range(half, mark):
                            trackingQuality = self.trackers[i - j].update(
                                small_frame)
                            tracked_position = self.trackers[i -
                                                             j].get_position()
                            t_x, t_y, t_w, t_h = self.unpack_position(
                                tracked_position)
                            if trackingQuality >= 4.0:
                                self.save_location(t_x, t_y, t_w, t_h, i)

                            # in case of losing tracked object from a tracker window searching using MTCNN detector on a small
                            # area is performed using area of last known position of a tracker window
                            else:
                                self.trackers.pop(i)
                                check = self.small_search(small_frame, i)
                                j += 1
                                if check != 0:
                                    x, y, w, h = self.extract_box(i)
                                    t = dlib.correlation_tracker()
                                    t.start_track(
                                        small_frame,
                                        dlib.rectangle(x, y, x + w, y + h))
                                    self.trackers.insert(i, t)
                else:
                    for i in range(len(self.trackers)):
                        trackingQuality = self.trackers[i].update(small_frame)
                        tracked_position = self.trackers[i].get_position()
                        t_x, t_y, t_w, t_h = self.unpack_position(
                            tracked_position)
                        if trackingQuality >= 4.0:
                            self.save_location(t_x, t_y, t_w, t_h, i)

                        # in case of losing tracked object from a tracker window searching using MTCNN detector on a small
                        # area is performed using area of last known position of a tracker window
                        else:
                            self.trackers.pop(i)
                            check = self.small_search(small_frame, i)
                            if check != 0:
                                x, y, w, h = self.extract_box(i)
                                t = dlib.correlation_tracker()
                                t.start_track(
                                    small_frame,
                                    dlib.rectangle(x, y, x + w, y + h))
                                self.trackers.insert(i, t)
        # next thing done is extracting the mask of the faces and hovering it into frame with converted background
        # mask is extracted from original frame
        new_frame = self.get_mask(frame)
        self.counter += 1
        return new_frame
Пример #21
0
 def __init__(self, model, graph):
     global video
     self.video = cv2.VideoCapture(0)
     self.gender_model = model
     self.graph = graph
     self.detector = MTCNN()
# fit model
model = SVC(kernel='linear', probability=True)
model.fit(emdTrainX_norm, trainy_enc)

from inception_resnet_v1 import *
facenet_model = InceptionResNetV1()
print("model built")

facenet_model.load_weights('facenet_weights.h5')
print("weights loaded")

cap = cv2.VideoCapture(0)  #webcam

while (True):
    ret, img = cap.read()
    detector = MTCNN()
    # detect faces in the image
    results = detector.detect_faces(img)
    print('results')
    #print(results)
    for i in range(len(results)):
        x, y, w, h = results[i]['box']

        if w > 130:  #discard small detected faces
            cv2.rectangle(img, (x, y), (x + w, y + h), (67, 67, 67),
                          1)  #draw rectangle to main image

            detected_face = img[int(y):int(y + h),
                                int(x):int(x + w)]  #crop detected face
            detected_face = cv2.resize(detected_face,
                                       (160, 160))  #resize to 224x224
Пример #23
0
def predict_emotion():
    emotion_list = [
        'Surprise', 'Fear', 'Disgust', 'Happiness', 'Sadness', 'Anger',
        'Neutral'
    ]
    model = RN.resnet10(include_top=False,
                        pooling='avg',
                        input_tensor=None,
                        input_shape=(224, 224, 3),
                        classes=7)
    x = model.output
    x = Dense(7, activation='softmax', name='fc8_5')(x)
    model = Model(inputs=model.input, outputs=x)
    model.load_weights(
        '/home/app/program/micro_emotion/resnet10/macro/model.h5',
        by_name=True)
    dector = MTCNN()
    # img_path = '/home/app/data/beiyou/basic/Image/test/train/3/test_0063.jpg'
    # img = cv2.imread(img_path)
    # # t = dector.detect_faces(img)
    # # point = t[0]['box']
    # # face = img[point[1]:point[1] + point[3], point[0]:point[0] + point[2]]
    # face = cv2.resize(img, (224, 224))
    # cv2.imshow('face',face)
    # face = img_to_array(face)
    # face = face.reshape((-1, 224, 224, 3))
    # out = model.predict(face)
    # print(emotion_list[out.argmax()])
    # cv2.waitKey()
    capture = cv2.VideoCapture(0)
    while (True):
        ref, frame = capture.read()
        img = frame.copy()
        t = dector.detect_faces(img)

        point = t[0]['box']
        #face = img[point[1]:point[1] + point[3], point[0]:point[0] + point[2]]
        keypoint1 = np.float32([[30, 30], [70, 30], [50, 80]])
        keypoint2 = []
        keypoint2.append(t[0]['keypoints']['left_eye'])
        keypoint2.append(t[0]['keypoints']['right_eye'])
        x = np.array(t[0]['keypoints']['mouth_left'], dtype=np.float32)
        y = np.array(t[0]['keypoints']['mouth_right'], dtype=np.float32)
        center = (x + y) / 2
        keypoint2 = np.array(keypoint2, dtype=np.float32)
        keypoint2 = np.row_stack((keypoint2, center))

        matrix = cv2.getAffineTransform(keypoint2, keypoint1)
        output = cv2.warpAffine(img, matrix, (img.shape[1], img.shape[0]))
        face = output[:100, :100]

        face = cv2.resize(face, (224, 224))
        face = img_to_array(face)
        face = face.reshape((-1, 224, 224, 3))
        start = time.clock()
        out = model.predict(face)
        end = time.clock()
        print('耗时{}s'.format(end - start))
        #print(out)
        print(emotion_list[out.argmax()])
        cv2.rectangle(frame, (point[0], point[1]),
                      (point[0] + point[2], point[1] + point[3]), (0, 255, 0),
                      2)
        cv2.imshow('1', frame)
        cv2.waitKey(1)
Пример #24
0
class FaceApi:
    detector = MTCNN()
import numpy as np
import cv2
from keras.models import load_model
from image_preprocess import preprocess

from mtcnn.mtcnn import MTCNN

detector = MTCNN()

# face_cascade = cv2.CascadeClassifier('Cascades\data\haarcascade_frontalface_alt2.xml')
model = load_model('models/face_mask_vggface_vgg16.h5')
cap = cv2.VideoCapture(0)

while (True):
    ret, frame = cap.read()
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    #Detect Face using MTNCC
    faces = detector.detect_faces(image)

    if faces != []:
        color = (255, 0, 0)
        stroke = 2
        x = faces[0]['box'][0]
        y = faces[0]['box'][1]
        w = faces[0]['box'][2]
        h = faces[0]['box'][3]

        cv2.rectangle(frame, (x, y), (x + w, y + h), color, stroke)
Пример #26
0
                default=1.24,
                type=float,
                help='ver dist threshold')

args = ap.parse_args()

# Load embeddings and labels
data = pickle.loads(open(args.embeddings, "rb").read())
le = pickle.loads(open(args.le, "rb").read())

embeddings = np.array(data['embeddings'])
print(len(embeddings))
labels = le.fit_transform(data['names'])

# Initialize detector
detector = MTCNN()

# Initialize faces embedding model
embedding_model = face_model.FaceModel(args)

# Load the classifier model
model = load_model('outputs/my_model.h5')


# Define distance function
def findCosineDistance(vector1, vector2):
    """
    Calculate cosine distance between two vector
    """
    vec1 = vector1.flatten()
    vec2 = vector2.flatten()
Пример #27
0
            print('already renamed')

for extension in extensions:
    for i, file in enumerate(glob.glob('%s/*%s' % (in_dir, extension))):
        image_path_list.append(file)

image_path_list = sorted(image_path_list)

# print (image_path_list)

# create an empty dictionary for filename, coordinate info
# to be written to a json file for the replacer script on the other side
info_dict = {}

# walk the list of input images, detect images
detector = MTCNN()

if not os.path.isfile('%s/already_cropped.json' % in_dir):
    with open('%s/already_cropped.json' % in_dir, 'w') as outfile:
        json.dump('already cropped!', outfile, indent=4)
        outfile.write("\n")

    for i, image_path in enumerate(image_path_list):
        try:
            image = cv2.imread(str(image_path))
            results = detector.detect_faces(image)[0]
            x, y, w, h = results['box']

            pad = int(.3 * h)
            x -= pad
            w += 2 * pad
class FaceDetector:
    def __init__(self):
        self.facenet_model = load_model(
            "D:\\PYTHON_CODE\\Face_Recognition\\facenet_keras.h5")
        self.svm_model = pickle.load(
            open("D:\\PYTHON_CODE\\Face_Recognition\\SVM_classifier.sav",
                 'rb'))
        self.data = np.load(
            'D:\\PYTHON_CODE\\Face_Recognition\\faces_dataset_embeddings.npz')
        # object to the MTCNN detector class
        self.detector = MTCNN()

    def face_mtcnn_extractor(self, frame):
        """Methods takes in frames from video, extracts and returns faces from them"""
        # Use MTCNN to detect faces in each frame of the video
        result = self.detector.detect_faces(frame)
        return result

    def face_localizer(self, person):
        """Method takes the extracted faces and returns the coordinates"""
        # 1. Get the coordinates of the face
        bounding_box = person['box']
        x1, y1 = abs(bounding_box[0]), abs(bounding_box[1])
        width, height = bounding_box[2], bounding_box[3]
        x2, y2 = x1 + width, y1 + height
        return x1, y1, x2, y2, width, height

    def face_preprocessor(self,
                          frame,
                          x1,
                          y1,
                          x2,
                          y2,
                          required_size=(160, 160)):
        """Method takes in frame, face coordinates and returns preprocessed image"""
        # 1. extract the face pixels
        face = frame[y1:y2, x1:x2]
        # 2. resize pixels to the model size
        image = Image.fromarray(face)
        image = image.resize(required_size)
        face_array = np.asarray(image)
        # 3. scale pixel values
        face_pixels = face_array.astype('float32')
        # 4. standardize pixel values across channels (global)
        mean, std = face_pixels.mean(), face_pixels.std()
        face_pixels = (face_pixels - mean) / std
        # 5. transform face into one sample
        samples = np.expand_dims(face_pixels, axis=0)
        # 6. get face embedding
        yhat = self.facenet_model.predict(samples)
        face_embedded = yhat[0]
        # 7. normalize input vectors
        in_encoder = Normalizer(norm='l2')
        X = in_encoder.transform(face_embedded.reshape(1, -1))
        return X

    def face_svm_classifier(self, X):
        """Methods takes in preprocessed images ,classifies and returns predicted Class label and probability"""
        # predict
        yhat = self.svm_model.predict(X)
        label = yhat[0]
        yhat_prob = self.svm_model.predict_proba(X)
        probability = round(yhat_prob[0][label], 2)
        trainy = self.data['arr_1']
        # predicted label decoder
        out_encoder = LabelEncoder()
        out_encoder.fit(trainy)
        predicted_class_label = out_encoder.inverse_transform(yhat)
        label = predicted_class_label[0]
        return label, str(probability)

    def face_detector(self):
        """Method classifies faces on live cam feed
           Class labels : sai_ram, donald_trump,narendra_modi, virat_koli"""
        # open cv for live cam feed
        cap = cv2.VideoCapture(0)
        while True:
            # Capture frame-by-frame
            __, frame = cap.read()
            # 1. Extract faces from frames
            result = self.face_mtcnn_extractor(frame)
            if result:
                for person in result:
                    # 2. Localize the face in the frame
                    x1, y1, x2, y2, width, height = self.face_localizer(person)
                    # 3. Proprocess the images for prediction
                    X = self.face_preprocessor(frame,
                                               x1,
                                               y1,
                                               x2,
                                               y2,
                                               required_size=(160, 160))
                    # 4. Predict class label and its probability
                    label, probability = self.face_svm_classifier(X)
                    print(" Person : {} , Probability : {}".format(
                        label, probability))
                    # 5. Draw a frame
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 155, 255), 2)
                    # 6. Add the detected class label to the frame
                    cv2.putText(frame,
                                label + probability, (x1, height),
                                cv2.FONT_HERSHEY_SIMPLEX,
                                1.0, (255, 255, 255),
                                lineType=cv2.LINE_AA)
            # display the frame with label
            cv2.imshow('frame', frame)
            # break on keybord interuption with 'q'
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        # When everything's done, release capture
        cap.release()
        cv2.destroyAllWindows()
Пример #29
0
        (bounding_box[0] + bounding_box[2], bounding_box[1] + bounding_box[3]),
        color, 2)

    cv2.putText(frame, str(uu), (bounding_box[0] + 2, bounding_box[1] + 14), cv2.FONT_HERSHEY_PLAIN,
        1, (0, 155, 255), 1)
        
    #Указатель направления взгляда
    look_x = bounding_box[0] + bounding_box[2] // 2
    look_y = bounding_box[1]
    final_point_x = look_x - round(bounding_box[2] * math.sin(x_angle * math.pi / 180))
    final_point_y = look_y - round(bounding_box[2] * math.sin(y_angle * math.pi / 180))
    cv2.line(frame, (look_x, look_y), (final_point_x, final_point_y), color, 3)
    size = round((bounding_box[2] + bounding_box[3] / 2) / 8)
    cv2.circle(frame, (final_point_x, final_point_y), size, color, 2)

detector = MTCNN()
@jit
def detect(frame):
    return detector.detect_faces(frame)
        
        
#cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture("output.mp4")

file_name = "save/log.txt"
file = open(file_name, 'w')
file.close()

while(True):
    file = open(file_name, 'a') 
    start = timer()
def get_all_fnames(base_folder):
    all_fnames = glob(str(Path(base_folder, '**', '*')), recursive=True)
    all_imgs = []
    patterns = ['*jpg', '*jpeg', '*png']

    for pattern in patterns:
        match = re.compile(fnmatch.translate(pattern), re.IGNORECASE).match
        valid_pths = [pth for pth in all_fnames if match(pth)]
        all_imgs.extend(valid_pths)
                      
    return all_imgs
         
    


anfas_detector = MTCNN(steps_threshold = [0.4, 0.6, 0.6], min_face_size = 100)

base_folder = '../example'

result_base_folder = Path(f'{base_folder}_result')
if not os.path.exists(result_base_folder):
    os.mkdir(result_base_folder)
    
all_imgs = get_all_fnames(base_folder)

print(f'Найдено изображений: {len(all_imgs)}')
for num, img_path in enumerate(tqdm(all_imgs[:])):
    try:
        if num % 100 == 0:
            with open('progress.txt', 'w') as f:
                f.write(str(num))