示例#1
0
def mtcnn_localize_faces(image,
                         pnet,
                         rnet,
                         onet,
                         minsize=20,
                         threshold=[0.7, 0.8, 0.85],
                         factor=0.75):
    """
    Localize faces & its landmarks in image using MTCNN
    
    Params
    :image
    :minsize - min. face size
    :threshold - a list/array with 3 values. The thresholds for pnet, rnet & onet, respectively 
    :factor - sclaing factor for image octave

    Return
    :bbs - list of bounding boxes
    :lds - list of face landmarks
    """

    image = image[:, :, 0:3]
    bounding_boxes, landmarks = detect_face.detect_face(
        image, minsize, pnet, rnet, onet, threshold, factor)
    nrof_faces = bounding_boxes.shape[0]

    bbs = list()
    lds = list()
    if nrof_faces > 0:
        det = bounding_boxes[:, 0:4]

        bb = np.zeros((nrof_faces, 4), dtype=np.int32)
        lands = np.zeros((nrof_faces, 10), dtype=np.int32)
        landmarks = np.reshape(landmarks, (nrof_faces, 10))
        for i in range(nrof_faces):
            ## Convert to int32
            lands[i] = np.ravel(landmarks[i])
            bb[i] = np.ravel(det[i])
            # inner exception
            if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(
                    image[0]) or bb[i][3] >= len(image):
                print('face is inner of range!')
                continue
            else:
                ## get as top, right, bottom, left
                bbs.append((bb[i][1], bb[i][2], bb[i][3], bb[i][0]))
                lds.append(lands[i])

    return bbs, lds
示例#2
0
    def detect_faces_and_keypoints(self, image):
        """
        Detect faces from a image, return face bounding boxes and face key points.
        face boxes maybe further filtered with scores and keypoints
        """
        bounding_boxes, markers = detect_face.detect_face(
            image, self.minsize, self.nets[0], self.nets[1], self.nets[2],
            self.threshold, self.factor)

        num_faces = bounding_boxes.shape[0]
        bboxes = []
        scores = []
        keypoints = []
        for i in range(num_faces):
            bboxes.append(bounding_boxes[i, 0:4])
            scores.append(bounding_boxes[i, 4])
            pts = []
            for k in range(5):
                pts.append((int(markers[k, i]), int(markers[5 + k, i])))
            keypoints.append(pts)

        return bboxes, scores, keypoints
示例#3
0
def main(args):
    sleep(random.random())
    output_dir = os.path.expanduser(args.output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    # Store some git revision info in a text file in the log directory
    src_path,_ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
    dataset = facenet.get_dataset(args.input_dir)
    
    print('Creating networks and loading parameters')
    
    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        configs_ =  tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)
        configs_.gpu_options.allow_growth = True
        sess = tf.Session(config=configs_)
        with sess.as_default():
            pnet, rnet, onet = detect_face.create_mtcnn(sess, None)
    
    minsize = 20 # minimum size of face
    threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold
    factor = 0.709 # scale factor

    # Add a random key to the filename to allow alignment using multiple processes
    random_key = np.random.randint(0, high=99999)
    bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
    
    with open(bounding_boxes_filename, "w") as text_file:
        nrof_images_total = 0
        nrof_successfully_aligned = 0
        if args.random_order:
            random.shuffle(dataset)
        for cls in dataset:
            output_class_dir = os.path.join(output_dir, cls.name)
            if not os.path.exists(output_class_dir):
                os.makedirs(output_class_dir)
                if args.random_order:
                    random.shuffle(cls.image_paths)
            for image_path in cls.image_paths:
                nrof_images_total += 1
                filename = os.path.splitext(os.path.split(image_path)[1])[0]
                output_filename = os.path.join(output_class_dir, filename+'.png')
                print(image_path)
                if not os.path.exists(output_filename):
                    try:
                        img = misc.imread(image_path)
                    except (IOError, ValueError, IndexError) as e:
                        errorMessage = '{}: {}'.format(image_path, e)
                        print(errorMessage)
                    else:
                        if img.ndim<2:
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))
                            continue
                        if img.ndim == 2:
                            img = facenet.to_rgb(img)
                        img = img[:,:,0:3]
    
                        bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
                        nrof_faces = bounding_boxes.shape[0]
                        if nrof_faces>0:
                            det = bounding_boxes[:,0:4]
                            det_arr = []
                            img_size = np.asarray(img.shape)[0:2]
                            if nrof_faces>1:
                                if args.detect_multiple_faces:
                                    for i in range(nrof_faces):
                                        det_arr.append(np.squeeze(det[i]))
                                else:
                                    bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
                                    img_center = img_size / 2
                                    offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
                                    offset_dist_squared = np.sum(np.power(offsets,2.0),0)
                                    index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
                                    det_arr.append(det[index,:])
                            else:
                                det_arr.append(np.squeeze(det))

                            for i, det in enumerate(det_arr):
                                det = np.squeeze(det)
                                bb = np.zeros(4, dtype=np.int32)
                                bb[0] = np.maximum(det[0]-args.margin/2, 0)
                                bb[1] = np.maximum(det[1]-args.margin/2, 0)
                                bb[2] = np.minimum(det[2]+args.margin/2, img_size[1])
                                bb[3] = np.minimum(det[3]+args.margin/2, img_size[0])
                                cropped = img[bb[1]:bb[3],bb[0]:bb[2],:]
                                scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear')
                                nrof_successfully_aligned += 1
                                filename_base, file_extension = os.path.splitext(output_filename)
                                if args.detect_multiple_faces:
                                    output_filename_n = "{}_{}{}".format(filename_base, i, file_extension)
                                else:
                                    output_filename_n = "{}{}".format(filename_base, file_extension)
                                misc.imsave(output_filename_n, scaled)
                                text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3]))
                        else:
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))
                            
    print('Total number of images: %d' % nrof_images_total)
    print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
    def mtcnn(self,
              image,
              image_size=182,
              margin=44,
              gpu_memory_fraction=1.0,
              detect_multiple_faces=False):
        # output_dir = os.path.expanduser(output_dir)

        # if not os.path.exists(output_dir):
        #     os.makedirs(output_dir)
        # Store some git revision info in a text file in the log directory
        # src_path,_ = os.path.split(os.path.realpath(__file__))
        #
        # # facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))

        # dataset = facenet.get_dataset(input_dir)

        # for f in glob.glob(os.path.join(input_dir,'*.jpg')):
        #     dataset.append(f)
        # print(dataset)
        # print('Creating networks and loading parameters')
        self.detection = True
        # with tf.Graph().as_default():
        # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)
        # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))

        minsize = 20  # minimum size of face
        threshold = [0.6, 0.7, 0.7]  # three steps's threshold
        factor = 0.709  # scale factor

        # Add a random key to the filename to allow alignment using multiple processes
        # random_key = np.random.randint(0, high=99999)
        # bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_temp.txt')
        #
        # with open(bounding_boxes_filename, "w") as text_file:
        nrof_images_total = 0
        nrof_successfully_aligned = 0
        # if random_order:
        #     random.shuffle(dataset)
        for i in range(0, 1):
            # image_path = dataset

            nrof_images_total += 1
            # filename = os.path.splitext(os.path.split(image_path)[1])[0]
            # print(filename)
            # filename = 'temp'
            # output_filename = os.path.join(output_dir, filename+'.jpg')
            # print(image_path)
            # if not os.path.exists(output_filename):
            # try:
            # img = misc.imread(image_path)
            img = image
            # except (IOError, ValueError, IndexError) as e:
            #     errorMessage = '{}: {}'.format(image_path, e)
            #     print(errorMessage)
            # else:
            # if img.ndim<2:
            #     print('Unable to align "%s"' % image_path)
            #     text_file.write('%s\n' % (output_filename))
            #     continue
            if img.ndim == 2:
                img = facenet.to_rgb(img)
            img = img[:, :, 0:3]

            bounding_boxes, _ = detect_face.detect_face(
                img, minsize, self.pnet, self.rnet, self.onet, threshold,
                factor)
            nrof_faces = bounding_boxes.shape[0]
            if nrof_faces > 0:
                det = bounding_boxes[:, 0:4]
                det_arr = []
                img_size = np.asarray(img.shape)[0:2]
                if nrof_faces > 1:
                    if detect_multiple_faces:
                        for i in range(nrof_faces):
                            det_arr.append(np.squeeze(det[i]))
                    else:
                        bounding_box_size = (det[:, 2] - det[:, 0]) * (
                            det[:, 3] - det[:, 1])
                        img_center = img_size / 2
                        offsets = np.vstack([
                            (det[:, 0] + det[:, 2]) / 2 - img_center[1],
                            (det[:, 1] + det[:, 3]) / 2 - img_center[0]
                        ])
                        offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
                        index = np.argmax(
                            bounding_box_size - offset_dist_squared *
                            2.0)  # some extra weight on the centering
                        det_arr.append(det[index, :])
                else:
                    det_arr.append(np.squeeze(det))

                for i, det in enumerate(det_arr):
                    det = np.squeeze(det)
                    bb = np.zeros(4, dtype=np.int32)
                    bb[0] = np.maximum(det[0] - margin / 2, 0)
                    bb[1] = np.maximum(det[1] - margin / 2, 0)
                    bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
                    bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
                    cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
                    # scaled = misc.imresize(cropped, (image_size, image_size), interp='bilinear')
                    scaled = np.array(
                        Image.fromarray(cropped).resize(size=(image_size,
                                                              image_size)))
                    nrof_successfully_aligned += 1
                    # filename_base, file_extension = os.path.splitext(output_filename)
                    # if detect_multiple_faces:
                    #     output_filename_n = "{}_{}{}".format(filename_base, i, file_extension)
                    # else:
                    #     output_filename_n = "{}{}".format(filename_base, file_extension)
                    # misc.imsave(output_filename_n, scaled)
                    # text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3]))
            else:
                # print('Unable to align')
                print()

                # text_file.write('%s\n' % (output_filename))

        # print('Total number of images: %d' % nrof_images_total)
        # print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
        if nrof_successfully_aligned != 0:
            self.detection = True
            return scaled, bb
        else:
            return None, None
示例#5
0
def main():
    if ARGS.test == 'train':
        train(ARGS)
    elif ARGS.test == 'gen':
        gen(ARGS)
    elif ARGS.test == 'predict':
        predict(ARGS)
    elif ARGS.test == 'server':
        server_start(ARGS)
    elif ARGS.test == 'server_production':
        server_start(ARGS, ARGS.port)
    elif ARGS.test == 'hnm':
        hnm(ARGS)
    elif ARGS.test == 'val':
        val(ARGS)
    elif ARGS.test == 'fer':
        fer(ARGS)

    elif ARGS.test == 'profiling':
        profiling(ARGS)
    elif ARGS.test == 'facenet':
        print(facenet)

        mtcnn = tf.Session()
        pnet, rnet, onet = FaceDetector.create_mtcnn(mtcnn, None)

        # Load the model
        t_ = time.time()
        print('Loading model...')
        #fnet = facenet.load_model('../models/facenet/20180204-160909') # squeezenet
        fnet = facenet.load_model(
            '../models/facenet/20170512-110547.pb')  # InceptionResnet V1
        t_ = time.time() - t_
        print('done', t_ * 1000)

        stats = {
            'same': {
                'd_avg': 0.,
                'd_max': -9999.,
                'd_min': 9999.,
                'sqr_avg': 0.,
                'count': 0,
            },
            'diff': {
                'd_avg': 0.,
                'd_max': -9999.,
                'd_min': 9999.,
                'sqr_avg': 0.,
                'count': 0,
            },
            'precision': {},
            'timing': {
                'count': 0,
                'forward': 0.
            }
        }

        if True:
            # Get input and output tensors
            emb = None
            names = []
            for iteration in range(16):
                # Load faces from LFW dataset and parse their names from path to group faces
                images = []
                batch_size = 128
                fid = 0
                for i in range(batch_size):
                    f = DirectoryWalker().get_a_file(
                        directory='../data/face/lfw', filters=['.jpg'])
                    if f is None or not f.path:
                        break

                    n = os.path.split(os.path.split(f.path)[0])[1]
                    #print('name', n)
                    n = abs(hash(n)) % (10**8)

                    img = cv2.imread(f.path, 1)
                    img = img
                    extents, landmarks = FaceDetector.detect_face(
                        img / 255.,
                        120,
                        pnet,
                        rnet,
                        onet,
                        threshold=[0.6, 0.7, 0.9],
                        factor=0.6,
                        interpolation=cv2.INTER_LINEAR)

                    for j, e in enumerate(extents):
                        x1, y1, x2, y2, confidence = e.astype(dtype=np.int)
                        #print(len(landmarks[j]))
                        #cropped = img[int(x1):int(x2), int(y1):int(y2), :]
                        aligned = FaceApplications.align_face(img,
                                                              landmarks[j],
                                                              intensity=1.,
                                                              sz=160,
                                                              ortho=True,
                                                              expand=1.5)
                        #cv2.imwrite('../data/face/mtcnn_cropped/'+str(fid).zfill(4)+'.jpg', aligned)

                        images.append(aligned / 255.)
                        names.append(n)
                        """debug = aligned.astype(dtype=np.int)
                        print('debug', debug)
                        for p in debug:
                            cv2.circle(img, (p[0], p[1]), 2, (255, 0, 255))

                        for p in landmarks[j]:
                            cv2.circle(img, (p[0], p[1]), 2, (255, 255, 0))"""

                        fid += 1

                    #cv2.imwrite('../data/face/mtcnn_cropped/'+str(i).zfill(4)+'-annotated.jpg', img)

                # Run forward pass to calculate embeddings
                if len(images):
                    t_ = time.time()
                    if emb is None:
                        emb = fnet(images)
                    else:
                        emb = np.concatenate((emb, fnet(images)))
                        #emb = emb + sess.run(embeddings, feed_dict=feed_dict)
                    t_ = time.time() - t_
                    stats['timing']['count'] += len(images)
                    stats['timing']['forward'] += t_ * 1000
                    print('forward', emb.shape, t_ * 1000)
                    print()

            print()
            print('avg. forward time:',
                  stats['timing']['forward'] / stats['timing']['count'])

            # Test distance
            samples = sklearn.preprocessing.normalize(emb)
            for i1, s1 in enumerate(samples):
                for i2, s2 in enumerate(samples):
                    if i1 != i2:
                        d_ = scipy.spatial.distance.cosine(s1, s2)

                        if names[i1] == names[
                                i2]:  # Same person as annotated by LFW
                            cate = 'same'
                        else:  # Different person
                            cate = 'diff'
                        c_ = stats[cate]['count']
                        stats[cate]['d_avg'] = stats[cate]['d_avg'] * c_ / (
                            c_ + 1) + d_ / (c_ + 1)
                        d_sqr = d_ * d_
                        stats[cate]['sqr_avg'] = stats[cate][
                            'sqr_avg'] * c_ / (c_ + 1) + d_sqr / (c_ + 1)
                        if d_ > stats[cate]['d_max']: stats[cate]['d_max'] = d_
                        elif d_ < stats[cate]['d_min']:
                            stats[cate]['d_min'] = d_
                        stats[cate]['count'] += 1

                        # Get statistics of precision on different thresholds
                        increments = 64
                        for t_ in range(increments):
                            threshold = 0.2 + t_ * (0.6 / increments)
                            if threshold not in stats['precision']:
                                stats['precision'][threshold] = {
                                    'correct': 0,
                                    'total': 0,
                                    'precision': 0.,
                                    'true_pos': 0,
                                    'total_pos': 0,
                                    'recall': 0.,
                                }
                            if (cate == 'same' and d_ <= threshold) or (
                                    cate == 'diff' and d_ > threshold):
                                stats['precision'][threshold]['correct'] += 1
                            if cate == 'same':
                                if d_ <= threshold:
                                    stats['precision'][threshold][
                                        'true_pos'] += 1
                                stats['precision'][threshold]['total_pos'] += 1
                                stats['precision'][threshold][
                                    'recall'] = stats['precision'][threshold][
                                        'true_pos'] / stats['precision'][
                                            threshold]['total_pos']
                            stats['precision'][threshold]['total'] += 1
                            stats['precision'][threshold]['precision'] = stats[
                                'precision'][threshold]['correct'] / stats[
                                    'precision'][threshold]['total']
            """tree = scipy.spatial.KDTree(samples)
            for i, s in enumerate(samples):
                print(i, tree.query(s))"""

        for cate in ['same', 'diff']:
            stats[cate]['stddev'] = stats[cate][
                'sqr_avg'] - stats[cate]['d_avg'] * stats[cate]['d_avg']
        print()
        pp = pprint.PrettyPrinter(indent=4)
        pp.pprint(stats)

        # Print precision vs recall
        print()
        print('threshold,recall,precision')
        for t in stats['precision']:
            t_stat = stats['precision'][t]
            print(
                str(t) + ',' + str(t_stat['recall']) + ',' +
                str(t_stat['precision']))

    elif ARGS.test == 'align':
        face_app = FaceApplications()
        face_app.align_dataset()
    elif ARGS.test == 'fxpress':
        fxpress(ARGS)
    elif ARGS.test == 'fxpress_train':
        fxpress_train(ARGS)
    elif ARGS.test == 'emoc':
        classifier = EmotionClassifier()
        classifier.build_network(ARGS)
        classifier.val(ARGS)
    elif ARGS.test == 'face_app':
        face_app = FaceApplications()
        face_app.detect()
    elif ARGS.test == 'face_benchmark':  # Test different parameters, resolutions, interpolation methods for MTCNN face detection time vs precision
        interpolations = ['NEAREST', 'LINEAR', 'AREA']
        resolutions = [256, 320, 384, 448, 512, 640, 1024, 1280]
        factors = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]

        expectations = [
            ['0001.jpg', 4],
            ['0002.jpg', 1],
            ['0003.jpg', 1],
            ['0004.jpg', 0],
            ['0005.jpg', 1],
            ['0006.jpg', 1],
            ['0007.jpg', 59],
            ['0008.jpg', 6],
            ['0009.jpg', 1],
            ['0010.jpg', 5],
            ['0011.jpg', 4],
            ['0012.jpg', 17],
            ['0013.jpg', 20],
            ['0014.jpg', 48],
            ['0015.jpg', 22],
        ]

        log_file = open('../data/face_benchmark.csv', 'w')
        log_file.write('time,precision_index,cp,options\n')

        for interp in interpolations:
            for res_cap in resolutions:
                for factor in factors:
                    #res_cap = resolutions[0]
                    #factor = factors[0]
                    #interp = interpolations[0]
                    options = {
                        'res_cap': res_cap,
                        'factor': factor,
                        'interp': interp,
                    }
                    if interp == 'NEAREST':
                        interpolation = cv2.INTER_NEAREST
                    elif interp == 'LINEAR':
                        interpolation = cv2.INTER_LINEAR
                    elif interp == 'AREA':
                        interpolation = cv2.INTER_AREA

                    iterations = 20
                    file_count = len(expectations)
                    time_sampling = np.zeros((
                        file_count,
                        iterations,
                    ),
                                             dtype=np.float)
                    pi_sampling = np.zeros((
                        file_count,
                        iterations,
                    ),
                                           dtype=np.float)

                    image_dir = '../data/face_benchmark/'
                    for k, item in enumerate(expectations):
                        filename, expected_faces = item
                        inpath = image_dir + filename
                        img = cv2.imread(inpath, 1)
                        if img is None:
                            break

                        img, scaling = ImageUtilities.fit_resize(
                            img,
                            maxsize=(res_cap, res_cap),
                            interpolation=interpolation)
                        retval, bindata = cv2.imencode('.jpg', img)
                        bindata_b64 = base64.b64encode(bindata).decode()

                        requests = {
                            'requests': [{
                                'requestId':
                                str(uuid.uuid1()),
                                'media': {
                                    'content': bindata_b64
                                },
                                'services': [{
                                    'type': 'face_',
                                    'model': 'a-emoc',
                                    'options': options
                                }]
                            }],
                            'timing': {
                                'client_sent': time.time()
                            }
                        }

                        for i in range(iterations):
                            #url = 'http://10.129.11.4/cgi/predict'
                            requests['timing']['client_sent'] = time.time()
                            url = 'http://192.168.41.41:8080/predict'
                            postdata = json.dumps(requests)
                            #print()
                            #print(postdata)
                            #print()
                            request = Request(url, data=postdata.encode())
                            response = json.loads(
                                urlopen(request).read().decode())
                            timing = response['timing']
                            server_time = timing['server_sent'] - timing[
                                'server_rcv']
                            #print('server time:', server_time)
                            total_time = (time.time() -
                                          timing['client_sent']) * 1000
                            client_time = total_time - server_time
                            print('response time:', total_time)
                            pi = 0.
                            for r_ in response['requests']:
                                for s_ in r_['services']:
                                    rects_ = s_['results']['rectangles']
                                    if expected_faces:
                                        pi = len(rects_) / expected_faces
                                    elif len(rects_):
                                        pi = expected_faces / len(rects_)
                                    else:
                                        pi = 1.0
                                    #print('faces detected:', len(rects_), pi)
                            time_sampling[k][i] = total_time
                            pi_sampling[k][i] = pi

                            #time.sleep(0.5)

                            #print()
                            #print(response)
                            #print()

                    time_mean = np.mean(time_sampling)
                    pi_mean = np.mean(pi_sampling) * 100
                    cp = pi_mean * pi_mean / time_mean
                    print(time_mean, pi_mean)
                    print()
                    log_file.write(','.join([
                        str(time_mean),
                        str(pi_mean),
                        str(cp),
                        json.dumps(options)
                    ]) + '\n')
                    log_file.flush()

        log_file.close()
示例#6
0
文件: app.py 项目: yixie-tony/FlaskAI
def age_gender():

    sample_url = url_for('uploaded_file', filename="sample.jpeg")

    if request.method == 'POST':
        if 'file' not in request.files:
            return render_template("face.html",
                                   error_msg="No file has been selected!")
        file = request.files['file']
        if file and allowed_file(file.filename):

            with tf.Graph().as_default():
                sess = tf.Session()
                with sess.as_default():
                    pnet, rnet, onet = mtcnn.create_mtcnn(
                        sess, MTCNN_MODEL_PATH)

            filename = file.filename
            file_name = os.path.join(app.config['UPLOAD_FOLDER'], filename)
            file.save(file_name)

            start = time.time()

            # Resize the orginal image to 680-width and save as jpeg
            cv_ori = cv2.imread(file_name)
            cv_resize = cv2.resize(
                cv_ori,
                dsize=(680, int(cv_ori.shape[0] / cv_ori.shape[1] * 680)),
                interpolation=cv2.INTER_CUBIC)
            cv2.imwrite(file_name, cv_resize)

            # Read the image for face detection
            img = misc.imread(file_name)
            cv_img = cv2.imread(file_name)

            # Detect faces and landmarks by MTCNN
            bounding_boxes, landmarks = mtcnn.detect_face(
                img, 20, pnet, rnet, onet, [0.6, 0.7, 0.7], 0.709)

            # Crop the aligned faces for age and gender classification
            aligned_images = gender_age_predict.load_image(cv_img, landmarks)

            # Estimate gender and age of each face using ResNet50
            genders, ages = gender_age_predict.inception(
                FROZEN_GRAPH_PATH, aligned_images)

            # Draw boxes and labels for faces
            gender_age_predict.draw_label(cv_img, bounding_boxes, genders,
                                          ages)

            save_path = os.path.join(app.config['NEW_FOLDER'], filename)
            cv2.imwrite(save_path, cv_img)

            end = time.time()

            print('\n Evaluation time: {:.3f}s\n'.format(end - start))
            file_url = url_for('uploaded_file', filename=filename)
            return render_template("face.html",
                                   user_image=file_url,
                                   error_msg='')
        else:
            print('\n Incorrect upload image format.\n')
            return render_template("face.html",
                                   user_image=sample_url,
                                   error_msg='Incorrect image format!')
    return render_template("face.html", user_image=sample_url, error_msg='')
示例#7
0
 def detect_faces(self, image):
     with self.graph.as_default():
         with self.sess.as_default():
             boxes, points = detect_face.detect_face(image, self.minsize, self.pnet, self.rnet, self.onet, self.threshold, self.factor)
     return self._assemble_detect_face_result(boxes, points)
示例#8
0
def cam_mtcnn(draw):

    minsize = 40  # minimum size of face
    threshold = [0.6, 0.7, 0.9]  # three steps's threshold
    factor = 0.709  # scale factor

    #draw = cv2.resize(draw, (960, 540))
    #img=cv2.cvtColor(draw,cv2.COLOR_BGR2GRAY)
    original = draw.copy()
    bounding_boxes, points = detect_face.detect_face(draw, minsize, pnet, rnet,
                                                     onet, threshold, factor)

    nrof_faces = bounding_boxes.shape[0]

    w, h, _ = draw.shape

    face = []

    for b in bounding_boxes:
        cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])),
                      (0, 255, 0))

        # [y1:y2, x1:x2]
        zeros = np.zeros((w, h))
        # face_w = int(b[2]) - int(b[0])
        # face_h = int(b[3]) - int(b[1])
        #
        # if face_w > w:
        # 	face_w = w
        # if face_h > h:
        # 	face_h = h

        face_h, face_w = zeros[int(b[1]):int(b[3]), int(b[0]):int(b[2])].shape

        ones = np.ones((face_h, face_w))

        zeros[int(b[1]):int(b[3]), int(b[0]):int(b[2])] = ones
        face_mask = zeros

        face = original[int(b[1]):int(b[3]), int(b[0]):int(b[2])]

    if len(points) != 0 and len(face) != 0:
        for p in points.T:
            for i in range(5):
                cv2.circle(draw, (p[i], p[i + 5]), 1, (0, 0, 255), 2)

            # print (points.shape)
            size = 30
            i = 0
            cv2.rectangle(draw, (int(p[i] - size), int(p[i + 5] - size)),
                          (int(p[i] + size), int(p[i + 5] + size)),
                          (0, 255, 0))

            left_eye = [p[i], p[i + 5]]
            eye_left = original[int(p[i + 5] - size):int(p[i + 5] + size),
                                int(p[i] - size):int(p[i] + size)]

            i = 1
            cv2.rectangle(draw, (int(p[i] - size), int(p[i + 5] - size)),
                          (int(p[i] + size), int(p[i + 5] + size)),
                          (0, 255, 0))

            right_eye = [p[i], p[i + 5]]
            eye_right = original[int(p[i + 5] - size):int(p[i + 5] + size),
                                 int(p[i] - size):int(p[i] + size)]

        # cv2.namedWindow('Face Detection',cv2.WINDOW_NORMAL)
        # cv2.resizeWindow('Face Detection', 1920, 1080)

        # cv2.imshow('Face Detection',draw)
        # disp_img("face", face)
        # disp_img("eye_left", eye_left)
        # disp_img("eye_right", eye_right)
        # disp_img("face_mask", face_mask)
        # disp_img("original", original)
        #
        # cv2.waitKey(0)
        return [
            original, draw, face, eye_left, eye_right, face_mask, left_eye,
            right_eye
        ]
    else:
        return []
示例#9
0
def convert_to_aligned_face(data_set, base_path, dataset_name):

    file_name = data_set.file_name
    genders = data_set.gender
    ages = data_set.age
    face_score = data_set.score
    num_images = data_set.shape[0]

    if dataset_name == "imdb":
        data_base_dir = os.path.join(base_path, "imdb_crop")
    elif dataset_name == "wiki":
        data_base_dir = os.path.join(base_path, "wiki_crop")
    else:
        raise NameError

    # load the mtcnn face detector
    with tf.Graph().as_default():
        sess = tf.Session()
        with sess.as_default():
            pnet, rnet, onet = mtcnn.create_mtcnn(sess, MTCNN_MODEL_PATH)

    error_count = 0
    write_count = 0

    for index in range(num_images):

        if face_score[index] < 0.75:
            continue
        if ~(0 <= ages[index] <= 100):
            continue
        if np.isnan(genders[index]):
            continue

        try:
            # Read the image for face detection
            img = misc.imread(
                os.path.join(data_base_dir, str(file_name[index][0])))
            cv_img = cv2.imread(
                os.path.join(data_base_dir, str(file_name[index][0])),
                cv2.IMREAD_COLOR)

            # Detect faces for age and gender classification
            bounding_boxes, landmarks = mtcnn.detect_face(
                img, 20, pnet, rnet, onet, [0.6, 0.7, 0.7], 0.709)

            if bounding_boxes.shape[0] != 1:
                continue
            else:
                # Crop aligned faces from image
                aligned_faces = load_image(cv_img, landmarks)
                face = aligned_faces[0]

                # Resize and write image to path
                output_dir = os.getcwd() + '/faces/' + dataset_name + '/'
                if os.path.isdir(output_dir):
                    pass
                else:
                    os.mkdir(output_dir)

                image_name = 'image{}_{}_{}.jpg'.format(
                    index + 70000, int(genders[index]), ages[index])
                output_path = output_dir + image_name
                cv2.imwrite(output_path, face)

        except Exception:  # some files seem not exist in face_data dir
            error_count = error_count + 1
            print("read {} error".format(index + 1))
            pass
        write_count = write_count + 1
    print("There are ", error_count, " missing pictures")
    print("Found", write_count, "valid faces")
示例#10
0
    def detect(self, media):
        timing = dict()
        result = dict({
            'mtcnn': list(),
            'mtcnn_5p': list(),
            'emotions': list(),
        })
        #print(media)
        src_img = None
        if 'content' in media:
            bindata = base64.b64decode(media['content'].encode())
            src_img = cv2.imdecode(np.frombuffer(bindata, np.uint8), 1)

        if src_img is not None:
            #print(img.shape)
            src_shape = src_img.shape

            time_start = time.time()
            gray = ImageUtilities.preprocess(src_img,
                                             convert_gray=cv2.COLOR_RGB2YCrCb,
                                             equalize=False,
                                             denoise=False,
                                             maxsize=384)
            time_diff = time.time() - time_start
            timing['preprocess'] = time_diff * 1000
            #print('preprocess', time_diff)

            processed_shape = gray.shape
            mrate = [
                processed_shape[0] / src_shape[0],
                processed_shape[1] / src_shape[1]
            ]

            time_start = time.time()
            rects, landmarks = FaceDetector().detect(gray)
            time_diff = time.time() - time_start
            timing['detect'] = time_diff * 1000
            #print('hog+svm detect', time_diff)

            time_start = time.time()
            facelist = list()
            rects_ = list()
            predictions_ = list()
            # Crop faces from source image
            for rect in rects:
                face = None
                (x, y, w, h) = ImageUtilities.rect_to_bb(rect, mrate=mrate)
                height, width, *rest = src_img.shape
                (x, y, w,
                 h) = ImageUtilities.rect_fit_ar([x, y, w, h],
                                                 [0, 0, width, height],
                                                 1.,
                                                 mrate=1.)
                if w > 0 and h > 0:
                    face = ImageUtilities.transform_crop((x, y, w, h),
                                                         src_img,
                                                         r_intensity=0.,
                                                         p_intensity=0.)
                    face = imresize(face, shape_raw[0:2])
                    #face = ImageUtilities.preprocess(face, convert_gray=None)
                if face is not None:
                    facelist.append(face)
                    rects_.append([x, y, w, h])
                    predictions_.append([0., 0.])
            val_data = np.array(facelist, dtype=np.float32) / 255
            reshaped = val_data.reshape((-1, ) + shape_flat)
            time_diff = time.time() - time_start
            timing['crop'] = time_diff * 1000
            #print('prepare data for cnn', time_diff)

            # MTCNN
            img = ImageUtilities.preprocess(src_img,
                                            convert_gray=None,
                                            equalize=False,
                                            denoise=False,
                                            maxsize=384)
            mrate_ = src_shape[0] / img.shape[0]
            time_start = time.time()
            minsize = 40  # minimum size of face
            threshold = [0.6, 0.7, 0.9]  # three steps's threshold
            factor = 0.709  # scale factor
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            bounding_boxes, points = mtcnn_detect.detect_face(
                img, minsize, self.pnet, self.rnet, self.onet, threshold,
                factor)

            if len(bounding_boxes):
                points = np.array(points) * mrate_
                points = points.reshape(2, -1)
                points = np.transpose(points)
                points = points.reshape((len(bounding_boxes), -1, 2))

            time_diff = time.time() - time_start
            timing['mtcnn'] = time_diff * 1000
            timing['emotion'] = 0
            print()
            print()
            print('result len', len(bounding_boxes), len(points))
            nrof_faces = bounding_boxes.shape[0]
            for i, b in enumerate(bounding_boxes):
                r_ = (np.array([b[0], b[1], b[2] - b[0], b[3] - b[1]]) *
                      mrate_).astype(dtype=np.int).tolist()
                result['mtcnn'].append(r_ + [
                    int(b[4] * 1000),
                ])
                result['mtcnn_5p'].append(
                    points[i].astype(dtype=np.int).tolist())
                #rects_.append(r_)
                #predictions_.append([0., 2.])

                # Facial Expression
                time_start = time.time()
                (x, y, w, h) = ImageUtilities.rect_fit_ar(
                    r_, [0, 0, src_shape[1], src_shape[0]], 1., crop=False)
                if w > 0 and h > 0:
                    face = ImageUtilities.transform_crop((x, y, w, h),
                                                         src_img,
                                                         r_intensity=0.,
                                                         p_intensity=0.)
                    #face = imresize(face, shape_raw[0:2])
                    #face = ImageUtilities.preprocess(face, convert_gray=cv2.COLOR_RGB2YCrCb, equalize=False, denoise=False)
                    face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
                    cv2.imwrite('./face.jpg', face)
                    #face = np.array(face, dtype=np.float32)/255
                    face = cv2.resize(face, (48, 48),
                                      interpolation=cv2.INTER_CUBIC) / 255.
                if face is not None:
                    #emotions = self.fxpress.predict(face)[0]
                    emotions = self.emoc.predict(face)
                    print('emotion', face.shape, emotions)
                    result['emotions'].append(
                        (np.array(emotions) *
                         1000).astype(dtype=np.int).tolist())
                time_diff = time.time() - time_start
                timing['emotion'] += time_diff * 1000

                print('rect', b, r_)
            print('emotions', result['emotions'])
            print('mtcnn_5p', result['mtcnn_5p'])
            print()
            print()

            # Self-trained cascade face detection
            img = ImageUtilities.preprocess(src_img,
                                            convert_gray=None,
                                            equalize=False,
                                            denoise=False,
                                            maxsize=384)
            ms_rects, ms_predictions, ms_timing = self.multi_scale_detection(
                img, expanding_rate=1.2, stride=12)
            mrate_ = src_shape[0] / img.shape[0]
            timing['cnn'] = ms_timing['cnn']
            timing['window_count'] = ms_timing['window_count']

            use_nms = True
            if len(ms_predictions):
                if use_nms:
                    # Apply non-maximum-supression
                    scores = np.array(
                        ms_predictions)[:,
                                        target_class:target_class + 1].reshape(
                                            (-1, ))
                    nms = tf.image.non_max_suppression(np.array(ms_rects),
                                                       scores,
                                                       iou_threshold=0.5,
                                                       max_output_size=99999)
                    for index, value in enumerate(nms.eval()):
                        r_ = (np.array(ms_rects[value]) *
                              mrate_).astype(dtype=np.int).tolist()
                        p_ = ms_predictions[value]
                        rects_.append(r_)
                        predictions_.append(p_)
                else:
                    for index, p_ in enumerate(ms_predictions):
                        r_ = (np.array(ms_rects[index]) *
                              mrate_).astype(dtype=np.int).tolist()
                        rects_.append(r_)
                        predictions_.append(p_)
            """time_start = time.time()
            feed_dict = {self.model.x: val_data.reshape((-1,)+shape_flat)}
            predictions = self.model.sess.run(self.model.y, feed_dict)
            time_diff = time.time() - time_start
            timing['cnn'] = time_diff*1000
            #print('cnn classify', time_diff, len(facelist))
            #print('predictions', predictions)"""

            return (rects_, predictions_, timing, result)