def __init__(self, args): model = edict() with tf.Graph().as_default(): config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.2 sess = tf.Session(config=config) #sess = tf.Session() with sess.as_default(): self.pnet, self.rnet, self.onet = detect_face.create_mtcnn(sess, None) self.threshold = args.threshold self.det_minsize = 50 self.det_threshold = [0.4,0.6,0.6] self.det_factor = 0.9 _vec = args.image_size.split(',') assert len(_vec)==2 self.image_size = (int(_vec[0]), int(_vec[1])) _vec = args.model.split(',') assert len(_vec)==2 prefix = _vec[0] epoch = int(_vec[1]) print('loading',prefix, epoch) self.model = edict() self.model.ctx = mx.gpu(args.gpu) self.model.sym, self.model.arg_params, self.model.aux_params = mx.model.load_checkpoint(prefix, epoch) self.model.arg_params, self.model.aux_params = ch_dev(self.model.arg_params, self.model.aux_params, self.model.ctx) all_layers = self.model.sym.get_internals() self.model.sym = all_layers['fc1_output']
def Augmentation(input_image, label): with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, './npy') sometimes = lambda aug: iaa.Sometimes(0.5, aug) aug_name = input_image.split("/")[-1].split(".")[0] minsize = 35 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 image_size = 200 nb_batches = 16 aug_label = [label] * nb_batches aug_faces = [] batches = [] seq = iaa.Sequential( [ iaa.Fliplr(0.5), sometimes( iaa.CropAndPad( percent=(-0.05, 0.1), pad_mode=ia.ALL, pad_cval=(0, 255))), sometimes( iaa.Affine(scale={ "x": (0.8, 1.0), "y": (0.8, 1.0) }, translate_percent={ "x": (-0.2, 0.2), "y": (0, 0.2) }, rotate=(-10, 10), shear=(-16, 16), order=[0, 1], cval=(0, 255))), iaa.SomeOf( (0, 4), [ iaa.OneOf([ iaa.GaussianBlur((0, 3.0)), iaa.AverageBlur(k=(2, 7)), iaa.MedianBlur(k=(3, 11)), ]), iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images iaa.Emboss(alpha=(0, 1.0), strength=(0, 1.0)), # emboss images iaa.SimplexNoiseAlpha( iaa.OneOf([ iaa.EdgeDetect(alpha=(0.2, 0.5)), iaa.DirectedEdgeDetect(alpha=(0.2, 0.5), direction=(0.0, 1.0)), ])), iaa.AdditiveGaussianNoise( loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5), iaa.Dropout((0.01, 0.1), per_channel=0.5), iaa.Add((-10, 10), per_channel=0.5), iaa.AddToHueAndSaturation((-20, 20)), iaa.ContrastNormalization((0.5, 1.5), per_channel=0.5), iaa.Grayscale(alpha=(0.0, 1.0)), sometimes( iaa.ElasticTransformation(alpha=(0.5, 2), sigma=0.25)), sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.03))), sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.1))) ], random_order=True) ], random_order=True) img = misc.imread(input_image) if img.ndim < 2: print("Unable !") elif img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] batches.append(np.array([img for _ in range(nb_batches)], dtype=np.uint8)) aug_images = seq.augment_images(batches[0]) for aug_img in aug_images: bounding_boxes, _ = detect_face.detect_face(aug_img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) det = det[index, :] det = np.squeeze(det) bb_temp = np.zeros(4, dtype=np.int32) bb_temp[0] = det[0] bb_temp[1] = det[1] bb_temp[2] = det[2] bb_temp[3] = det[3] cropped_temp = aug_img[bb_temp[1]:bb_temp[3], bb_temp[0]:bb_temp[2], :] scaled_temp = misc.imresize(cropped_temp, (image_size, image_size), interp='bilinear') aug_faces.append(scaled_temp) return (aug_label, aug_faces)
import os from os.path import join as pjoin import sys import time import copy import math import pickle from sklearn.svm import SVC from sklearn.externals import joblib print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, './d_npy') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir("./input_dir") HumanNames.sort() print('Loading feature extraction model') modeldir = './pre_model/20170511-185253.pb'
def load_and_align_data(image_files, image_size, margin, gpu_memory_fraction): # TODO: set the parameters (minsize, threshold, scale factor) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor print('Creating networks and loading parameters') # TODO: create MT-CNN (P-net, R-net, O-net) with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) path_exp = os.path.expanduser(image_files) image_paths = facenet.get_image_paths(path_exp) nrof_samples = len(image_paths) img_list = [None] * nrof_samples for i in range(nrof_samples): # TODO: face detection and alignment img = cv2.imread(image_paths[i]) bounding_boxes, landmarks = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] landmark = landmarks img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det = det[index, :] landmark = landmark[:, index] det = np.squeeze(det) landmark = np.squeeze(landmark) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] aligned = face_alignment(cropped, image_size, landmark) prewhitened = facenet.prewhiten(aligned) img_list[i] = prewhitened images = np.stack(img_list) return images
for x in file_list: f.write(str(x[0]) + " " + str(x[1]) + " " + str(x[2]) + '\n') print(x) print("file saved") os.system('../src/send.sh ' + output_name) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options, log_device_placement=True)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, '../parameter/det/') minsize = 35 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 #humans_dir = '../image/' #humans_dir = '../src/lsg_mtcnn_160/' humans_dir = '../src/mtcnnpy_160/' humans_dir = facenet.get_dataset(humans_dir) HumanNames = [] Human_hash = dict()
def collect_data(self): output_dir = os.path.expanduser(self.output_datadir) if not os.path.exists(output_dir): os.makedirs(output_dir) dataset = facenet.get_dataset(self.input_datadir) with tf.Graph().as_default(): gpu_options = tf.compat.v1.GPUOptions( per_process_gpu_memory_fraction=0.5) sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, './npy') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 image_size = 182 # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join( output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext( os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') print("Image: %s" % image_path) if not os.path.exists(output_filename): try: img = imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) print('to_rgb data dimension: ', img.ndim) img = img[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('No of Detected Face: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: bounding_box_size = ( det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum( np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0 ) # some extra weight on the centering det = det[index, :] det = np.squeeze(det) bb_temp = np.zeros(4, dtype=np.int32) bb_temp[0] = det[0] bb_temp[1] = det[1] bb_temp[2] = det[2] bb_temp[3] = det[3] cropped_temp = img[bb_temp[1]:bb_temp[3], bb_temp[0]:bb_temp[2], :] scaled_temp = resize(cropped_temp, (image_size, image_size)) nrof_successfully_aligned += 1 imageio.imwrite(output_filename, scaled_temp) text_file.write( '%s %d %d %d %d\n' % (output_filename, bb_temp[0], bb_temp[1], bb_temp[2], bb_temp[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) return (nrof_images_total, nrof_successfully_aligned)
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) #facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = face_image.get_dataset(args.name, args.input_dir) print('dataset size', args.name, len(dataset)) print('Creating networks and loading parameters') with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 100 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor #image_size = [112,96] image_size = [112,112] src = np.array([ [30.2946, 51.6963], [65.5318, 51.5014], [48.0252, 71.7366], [33.5493, 92.3655], [62.7299, 92.2041] ], dtype=np.float32 ) if image_size[1]==112: src[:,0] += 8.0 # Add a random key to the filename to allow alignment using multiple processes #random_key = np.random.randint(0, high=99999) #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) #output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) output_filename = os.path.join(args.output_dir, 'lst') with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof = np.zeros( (5,), dtype=np.int32) for fimage in dataset: if nrof_images_total%100==0: print("Processing %d, (%s)" % (nrof_images_total, nrof)) nrof_images_total += 1 #if nrof_images_total<950000: # continue image_path = fimage.image_path if not os.path.exists(image_path): print('image not found (%s)'%image_path) continue filename = os.path.splitext(os.path.split(image_path)[1])[0] #print(image_path) try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim<2: print('Unable to align "%s", img dim error' % image_path) #text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) img = img[:,:,0:3] _paths = fimage.image_path.split('/') a,b,c = _paths[-3], _paths[-2], _paths[-1] target_dir = os.path.join(args.output_dir, a, b) if not os.path.exists(target_dir): os.makedirs(target_dir) target_file = os.path.join(target_dir, c) warped = None if fimage.landmark is not None: dst = fimage.landmark.astype(np.float32) tform = trans.SimilarityTransform() tform.estimate(dst, src[0:3,:]*1.5+image_size[0]*0.25) M = tform.params[0:2,:] warped0 = cv2.warpAffine(img,M,(image_size[1]*2,image_size[0]*2), borderValue = 0.0) _minsize = image_size[0] bounding_boxes, points = detect_face.detect_face(warped0, _minsize, pnet, rnet, onet, threshold, factor) if bounding_boxes.shape[0]>0: bindex = 0 det = bounding_boxes[bindex,0:4] #points need to be transpose, points = points.reshape( (5,2) ).transpose() dst = points[:, bindex].reshape( (2,5) ).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2,:] warped = cv2.warpAffine(warped0,M,(image_size[1],image_size[0]), borderValue = 0.0) nrof[0]+=1 #assert fimage.bbox is not None if warped is None and fimage.bbox is not None: _minsize = img.shape[0]//4 bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor) if bounding_boxes.shape[0]>0: det = bounding_boxes[:,0:4] bindex = -1 index2 = [0.0, 0] for i in range(det.shape[0]): _det = det[i] iou = IOU(fimage.bbox, _det) if iou>index2[0]: index2[0] = iou index2[1] = i if index2[0]>0.3: bindex = index2[1] if bindex>=0: dst = points[:, bindex].reshape( (2,5) ).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2,:] warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0) nrof[1]+=1 #print('1',target_file,index2[0]) if warped is None and fimage.bbox is not None: bb = fimage.bbox #croped = img[bb[1]:bb[3],bb[0]:bb[2],:] bounding_boxes, points = detect_face.detect_face_force(img, bb, pnet, rnet, onet) assert bounding_boxes.shape[0]==1 _box = bounding_boxes[0] if _box[4]>=0.3: dst = points[:, 0].reshape( (2,5) ).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2,:] warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0) nrof[2]+=1 #print('2',target_file) if warped is None: roi = np.zeros( (4,), dtype=np.int32) roi[0] = int(img.shape[1]*0.06) roi[1] = int(img.shape[0]*0.06) roi[2] = img.shape[1]-roi[0] roi[3] = img.shape[0]-roi[1] if fimage.bbox is not None: bb = fimage.bbox h = bb[3]-bb[1] w = bb[2]-bb[0] x = bb[0] y = bb[1] #roi = np.copy(bb) _w = int( (float(h)/image_size[0])*image_size[1] ) x += (w-_w)//2 #x = min( max(0,x), img.shape[1] ) x = max(0,x) xw = x+_w xw = min(xw, img.shape[1]) roi = np.array( (x, y, xw, y+h), dtype=np.int32) nrof[3]+=1 else: nrof[4]+=1 #print('3',bb,roi,img.shape) #print('3',target_file) warped = img[roi[1]:roi[3],roi[0]:roi[2],:] #print(warped.shape) warped = cv2.resize(warped, (image_size[1], image_size[0])) bgr = warped[...,::-1] cv2.imwrite(target_file, bgr) oline = '%d\t%s\t%d\n' % (1,target_file, int(fimage.classname)) text_file.write(oline)
import tensorflow as tf from scipy import misc import cv2 import numpy as np import facenet import detect_face import os import time import pickle print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, './models/') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 # HumanNames = ['Andrew','Obama','ZiLin'] #train human name print('Loading feature extraction model') modeldir = './models/facenet/20190308-101738' facenet.load_model(modeldir)
output_dir_path = os.getcwd() + '/datasets/mydata/aligned' output_dir = os.path.expanduser(output_dir_path) if not os.path.exists(output_dir): os.makedirs(output_dir) datadir = os.getcwd() + '/datasets/mydata/raw' dataset = facenet.get_dataset(datadir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, os.getcwd() + '/align') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 image_size = 182 # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) print('Goodluck') with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0
def main(args): #facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = face_image.get_dataset('lfw', args.input_dir) print('dataset size', 'lfw', len(dataset)) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) # sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 threshold = [0.6, 0.7, 0.9] factor = 0.85 # Add a random key to the filename to allow alignment using multiple processes #random_key = np.random.randint(0, high=99999) #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) #output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) output_filename = os.path.join(args.output_dir, 'lst') with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof = np.zeros((5, ), dtype=np.int32) for fimage in dataset: if nrof_images_total % 100 == 0: print("Processing %d, (%s)" % (nrof_images_total, nrof)) nrof_images_total += 1 #if nrof_images_total<950000: # continue image_path = fimage.image_path if not os.path.exists(image_path): print('image not found (%s)' % image_path) continue filename = os.path.splitext(os.path.split(image_path)[1])[0] #print(image_path) try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s", img dim error' % image_path) #text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) img = img[:, :, 0:3] _paths = fimage.image_path.split('/') a, b = _paths[-2], _paths[-1] target_dir = os.path.join(args.output_dir, a) if not os.path.exists(target_dir): os.makedirs(target_dir) target_file = os.path.join(target_dir, b) _minsize = minsize _bbox = None _landmark = None bounding_boxes, points = detect_face.detect_face( img, _minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] bindex = 0 if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * ( det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) bindex = np.argmax( bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering _bbox = bounding_boxes[bindex, 0:4] _landmark = points[:, bindex].reshape((2, 5)).T nrof[0] += 1 else: nrof[1] += 1 warped = face_preprocess.preprocess(img, bbox=_bbox, landmark=_landmark, image_size=args.image_size) bgr = warped[..., ::-1] #print(bgr.shape) cv2.imwrite(target_file, bgr) oline = '%d\t%s\t%d\n' % (1, target_file, int( fimage.classname)) text_file.write(oline)
def main(): global face_detected global save_pic global ii model_path = "models/20170511-185253.pb" # classifier_output_path = "/mnt/softwares/acv_project_code/Code/classifier_rf1_team.pkl" classifier_output_path = "models/classifier_rf4.pkl" #classifier_output_path = "/mnt/softwares/acv_project_code/Code/classfier_path/classifier_svm.pkl" with gfile.FastGFile(model_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name='') images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embedding_layer = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name( "phase_train:0") gpu_memory_fraction = 0.5 with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess1 = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) # sess1 = tf.Session(config=tf.ConfigProto(device_count = {'GPU': 0})) with sess1.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess1, None) model, class_names = pickle.load(open(classifier_output_path, 'rb'), encoding='latin1') cap_2 = cv2.VideoCapture( 'rtsp://*****:*****@192.168.10.111:554/' ) # cap = cv2.VideoCapture('/home/lokender/Downloads/orig_faces/videos/nayeem.mp4') #cap_2 = cv2.VideoCapture(0) fno = 0 det_name = [] det_prob = [] bbs = [] i = 0 while (~(cv2.waitKey(1) & 0xFF == ord('q'))): ret, image3 = cap_2.read() image4 = cv2.resize(image3, (600, 400)) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold #factor = 0.709 # scale factor orignal factor = 0.400 img = image4[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) print("-----------------------------------------------------") print(bounding_boxes) print("-----------------------------------------------------") nrof_faces = bounding_boxes.shape[0] print(nrof_faces) if nrof_faces == 1: top = bounding_boxes[0][1] right = bounding_boxes[0][0] bottom = bounding_boxes[0][3] left = bounding_boxes[0][2] print(top) print(right) print(bottom) print(left) cv2.rectangle(image4, (int(left + 60), int(top - 60)), (int(right - 60), int(bottom + 60)), (0, 0, 255), 2) crop_img = image4[int(top - 60):int(right + 60), int(bottom - 60):int(left + 60)] name_time = int(time.time()) print(name_time) if ii <= 5: ii += 1 else: ii = 0 name_time = str(name_time) + str(ii) path = os.path.join(folder_path, str(name_time) + '.jpg') print(path) gray_img = cv2.cvtColor(crop_img, cv2.COLOR_RGB2GRAY) cv2.imwrite(path, gray_img) #i = i+1 face_detected = True print("taking first imagae") #_" + str(i)) #save_pic = 0 #cv2.resize(image4,(600,400)) cv2.imshow('In Camera Live Feed', image4) fno = fno + 1 #cap.release() cap_2.release() cv2.destroyAllWindows()
if not os.path.exists(output_dir): os.makedirs(output_dir) #datadir = '/..Path to human img data folder../' #datadir='/data0/krohitm/posture_dataset/scott_vid/realtime_deep_face/training' datadir = '/data0/krohitm/posture_dataset/scott_vid/facenet_dataset/{0}/training'.format( parent_folder) dataset = facenet.get_dataset(datadir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn( sess, '/home/krohitm/code/facenet/src/align') #pnet, rnet, onet = detect_face.create_mtcnn(sess, './Path to det1.npy,..') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 image_size = 182 # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) print('Goodluck') with open(bounding_boxes_filename, "w") as text_file:
def loadFromNPY(): print('Creating networks and loading parameters') sess = tf.Session() return detect_face.create_mtcnn(sess, None)
output_dir_path = 'human_data/align' output_dir = os.path.expanduser(output_dir_path) if not os.path.exists(output_dir): os.makedirs(output_dir) datadir = 'human_data/unalign' dataset = facenet.get_dataset(datadir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, 'data') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 image_size = 182 # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) print('Goodluck') with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) # facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = face_image.get_dataset(args.name, args.input_dir) print('dataset size', args.name, len(dataset)) print('Creating networks and loading parameters') with tf.Graph().as_default(): # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 100 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor if args.name == 'lfw' or args.name == 'webface' or args.name == 'vgg': minsize = 20 threshold = [0.6, 0.7, 0.9] factor = 0.85 if args.name == 'ytf': minsize = 20 threshold = [0.6, 0.7, 0.4] factor = 0.85 print(minsize) print(threshold) print(factor) # Add a random key to the filename to allow alignment using multiple processes # random_key = np.random.randint(0, high=99999) # bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 nrof_changed = 0 nrof_iou3 = 0 nrof_force = 0 for fimage in dataset: if nrof_images_total % 100 == 0: print("Processing %d, (%d)" % (nrof_images_total, nrof_successfully_aligned)) nrof_images_total += 1 image_path = fimage.image_path if not os.path.exists(image_path): print('image not found (%s)' % image_path) continue filename = os.path.splitext(os.path.split(image_path)[1])[0] # print(image_path) try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s", img dim error' % image_path) # text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) img = img[:, :, 0:3] _minsize = minsize if fimage.bbox is not None: _bb = fimage.bbox _minsize = min([ _bb[2] - _bb[0], _bb[3] - _bb[1], img.shape[0] // 2, img.shape[1] // 2 ]) bounding_boxes, points = detect_face.detect_face( img, _minsize, pnet, rnet, onet, threshold, factor) bindex = -1 nrof_faces = bounding_boxes.shape[0] if fimage.bbox is None and nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] bindex = 0 if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * ( det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) bindex = np.argmax( bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering if fimage.bbox is not None: if nrof_faces > 0: assert (bounding_boxes.shape[0] == points.shape[1]) det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] index2 = [0.0, 0] for i in xrange(det.shape[0]): _det = det[i] iou = IOU(fimage.bbox, _det) if iou > index2[0]: index2[0] = iou index2[1] = i if index2[0] > -0.3: bindex = index2[1] nrof_iou3 += 1 if bindex < 0: bounding_boxes, points = detect_face.detect_face_force( img, fimage.bbox, pnet, rnet, onet) bindex = 0 nrof_force += 1 # if bindex<0: # _img = img[fimage.bbox[1]:fimage.bbox[3], fimage.bbox[0]:fimage.bbox[2],:] # woffset = fimage.bbox[0] # hoffset = fimage.bbox[1] # _minsize = min( [_img.shape[0]//3, _img.shape[1]//3] ) # bounding_boxes, points = detect_face.detect_face(_img, _minsize, pnet, rnet, onet, [0.6,0.7,0.01], factor) # nrof_faces = bounding_boxes.shape[0] # print(nrof_faces) # if nrof_faces>0: # #print(points.shape) # #assert(nrof_faces>0) # bounding_boxes[:,0]+=woffset # bounding_boxes[:,2]+=woffset # bounding_boxes[:,1]+=hoffset # bounding_boxes[:,3]+=hoffset # points[0:5,:] += woffset # points[5:10,:] += hoffset # bindex = 0 # score = bounding_boxes[bindex,4] # print(score) # if score<=0.0: # bindex = -1 # else: # nrof_force+=1 # if bindex<0: # _bb = fimage.bbox # _minsize = min( [_bb[2]-_bb[0], _bb[3]-_bb[1], img.shape[0]//2, img.shape[1]//2] ) # bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, [0.6,0.7,0.1], factor) # nrof_faces = bounding_boxes.shape[0] # print(nrof_faces) # if nrof_faces>0: # bindex = 0 # if fimage.bbox is not None and bounding_boxes.shape[0]==0: # bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, [0.6,0.7,0.3], factor) # print(bounding_boxes.shape, points.shape) # print(nrof_faces, points.shape) if bindex >= 0: det = bounding_boxes[:, 0:4] det = det[bindex, :] points = points[:, bindex] # points need to be transpose, points = points.reshape( (5,2) ).transpose() det = np.squeeze(det) # bb = np.zeros(4, dtype=np.int32) # bb[0] = np.maximum(det[0]-args.margin/2, 0) # bb[1] = np.maximum(det[1]-args.margin/2, 0) # bb[2] = np.minimum(det[2]+args.margin/2, img_size[1]) # bb[3] = np.minimum(det[3]+args.margin/2, img_size[0]) bb = det # print(points.shape) points = list(points.flatten()) assert (len(points) == 10) # cropped = img[bb[1]:bb[3],bb[0]:bb[2],:] # scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear') # misc.imsave(output_filename, scaled) nrof_successfully_aligned += 1 oline = '%d\t%s\t%d\t%d\t%d\t%d\t%d\t' % ( 0, fimage.image_path, int( fimage.classname), bb[0], bb[1], bb[2], bb[3]) oline += '\t'.join([str(x) for x in points]) text_file.write("%s\n" % oline) else: print('Unable to align "%s", no face detected' % image_path) if args.force > 0: if fimage.bbox is None: oline = '%d\t%s\t%d\n' % (0, fimage.image_path, int(fimage.classname)) else: bb = fimage.bbox oline = '%d\t%s\t%d\t%d\t%d\t%d\t%d\n' % ( 0, fimage.image_path, int(fimage.classname), bb[0], bb[1], bb[2], bb[3]) text_file.write(oline) # text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned) print('Number of changed: %d' % nrof_changed) print('Number of iou3: %d' % nrof_iou3) print('Number of force: %d' % nrof_force)
ratio = Area*1./(Area1+Area2-Area) return ratio parser = argparse.ArgumentParser(description='Package CFP images') # general parser.add_argument('--data-dir', default='', help='') parser.add_argument('--image-size', type=str, default='112,96', help='') parser.add_argument('--output', default='./', help='path to save.') args = parser.parse_args() with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 threshold = [0.6,0.7,0.9] factor = 0.85 #minsize = 15 threshold = [0.6,0.7,0.7] factor = 0.9 #factor = 0.7 for part in [ ('CFP_frontal_paris.mat', 'cfp_ff'), ('CFP_profile_paris.mat', 'cfp_fp') ]: mat_file = os.path.join(args.data_dir, part[0]) mat_data = loadmat(mat_file) pairs = mat_data['pairs'] bins = [] issame_list = []
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) datamap = {} pp = 0 datasize = 0 verr = 0 for line in open(args.input_dir+"_clean_list.txt", 'r'): pp+=1 if pp%10000==0: print('loading list', pp) line = line.strip()[2:] if not line.startswith('m.'): continue vec = line.split('/') assert len(vec)==2 #print(line) person = vec[0] img = vec[1] try: img_id = int(img.split('.')[0]) except ValueError: #print('value error', line) verr+=1 continue if not person in datamap: labelid = len(datamap) datamap[person] = [labelid, {img_id : 1}] else: datamap[person][1][img_id] = 1 datasize+=1 print('dataset size', args.name, datasize) print('dataset err', verr) print('Creating networks and loading parameters') with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 100 # minimum size of face #threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold threshold = [ 0.6, 0.6, 0.3 ] # three steps's threshold factor = 0.709 # scale factor print(minsize) print(threshold) print(factor) # Add a random key to the filename to allow alignment using multiple processes #random_key = np.random.randint(0, high=99999) #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 nrof_changed = 0 nrof_iou3 = 0 nrof_force = 0 for line in open(args.input_dir, 'r'): vec = line.strip().split() person = vec[0] img_id = int(vec[1]) v = datamap.get(person, None) if v is None: continue #TODO #if not img_id in v[1]: # continue labelid = v[0] img_str = base64.b64decode(vec[-1]) nparr = np.fromstring(img_str, np.uint8) img = cv2.imdecode(nparr, cv2.CV_LOAD_IMAGE_COLOR) img = img[...,::-1] #to rgb if nrof_images_total%100==0: print("Processing %d, (%d)" % (nrof_images_total, nrof_successfully_aligned)) nrof_images_total += 1 target_dir = os.path.join(output_dir, person) if not os.path.exists(target_dir): os.makedirs(target_dir) target_path = os.path.join(target_dir, "%d.jpg"%img_id) _minsize = minsize fimage = edict() fimage.bbox = None fimage.image_path = target_path fimage.classname = str(labelid) if fimage.bbox is not None: _bb = fimage.bbox _minsize = min( [_bb[2]-_bb[0], _bb[3]-_bb[1], img.shape[0]//2, img.shape[1]//2] ) else: _minsize = min(img.shape[0]//5, img.shape[1]//5) bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor) bindex = -1 nrof_faces = bounding_boxes.shape[0] if fimage.bbox is None and nrof_faces>0: det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] bindex = 0 if nrof_faces>1: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering if fimage.bbox is not None: if nrof_faces>0: assert(bounding_boxes.shape[0]==points.shape[1]) det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] index2 = [0.0, 0] for i in range(det.shape[0]): _det = det[i] iou = IOU(fimage.bbox, _det) if iou>index2[0]: index2[0] = iou index2[1] = i if index2[0]>-0.3: bindex = index2[1] nrof_iou3+=1 if bindex<0: bounding_boxes, points = detect_face.detect_face_force(img, fimage.bbox, pnet, rnet, onet) bindex = 0 nrof_force+=1 if bindex>=0: det = bounding_boxes[:,0:4] det = det[bindex,:] points = points[:, bindex] landmark = points.reshape((2,5)).T #points need to be transpose, points = points.reshape( (5,2) ).transpose() det = np.squeeze(det) bb = det points = list(points.flatten()) assert(len(points)==10) warped = face_preprocess.preprocess(img, bbox=bb, landmark = landmark, image_size=args.image_size) misc.imsave(target_path, warped) nrof_successfully_aligned += 1 oline = '%d\t%s\t%d' % (1,fimage.image_path, int(fimage.classname)) #oline = '%d\t%s\t%d\t%d\t%d\t%d\t%d\t' % (0,fimage.image_path, int(fimage.classname), bb[0], bb[1], bb[2], bb[3]) #oline += '\t'.join([str(x) for x in points]) text_file.write("%s\n"%oline) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned) print('Number of changed: %d' % nrof_changed) print('Number of iou3: %d' % nrof_iou3) print('Number of force: %d' % nrof_force)
def main(args): #facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = face_image.get_dataset('lfw', args.input_dir) print('dataset size', 'lfw', len(dataset)) print('Creating networks and loading parameters') with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 threshold = [0.6,0.7,0.9] factor = 0.85 # Add a random key to the filename to allow alignment using multiple processes #random_key = np.random.randint(0, high=99999) #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) #output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) output_filename = os.path.join(args.output_dir, 'lst') with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof = np.zeros( (5,), dtype=np.int32) for fimage in dataset: if nrof_images_total%100==0: print("Processing %d, (%s)" % (nrof_images_total, nrof)) nrof_images_total += 1 #if nrof_images_total<950000: # continue image_path = fimage.image_path if not os.path.exists(image_path): print('image not found (%s)'%image_path) continue filename = os.path.splitext(os.path.split(image_path)[1])[0] #print(image_path) try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim<2: print('Unable to align "%s", img dim error' % image_path) #text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) img = img[:,:,0:3] _paths = fimage.image_path.split('/') a,b = _paths[-2], _paths[-1] target_dir = os.path.join(args.output_dir, a) if not os.path.exists(target_dir): os.makedirs(target_dir) target_file = os.path.join(target_dir, b) _minsize = minsize _bbox = None _landmark = None bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces>0: det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] bindex = 0 if nrof_faces>1: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering _bbox = bounding_boxes[bindex, 0:4] _landmark = points[:, bindex].reshape( (2,5) ).T nrof[0]+=1 else: nrof[1]+=1 warped = face_preprocess.preprocess(img, bbox=_bbox, landmark = _landmark, image_size=args.image_size) bgr = warped[...,::-1] #print(bgr.shape) cv2.imwrite(target_file, bgr) oline = '%d\t%s\t%d\n' % (1,target_file, int(fimage.classname)) text_file.write(oline)
factor = 0.709 # scale factor:比例因子 gpu_memory_fraction = 1.0 # 拿出GPU容量比列 print('Creating networks and loading parameters') # 创建session,对session进行参数配置 with tf.Graph().as_default(): # 指定了每个GPU进程中使用显存的上限,但它只能均匀地作用于所有GPU,无法对不同GPU设置不同的上限。 # 1:每个GPU拿出全部容量给进程使用 gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) # 默认是用GPU内存,不打印设备分配日志 sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): # 创建mtcnn结构 pnet, rnet, onet = detect_face.create_mtcnn(sess, 'detection_model') image_face_dir = './static/regist_image/' image_face_paths = [ os.path.join(image_face_dir, i) for i in os.listdir(image_face_dir) ] image_index = 0 for image_face in image_face_paths: image_index += 1 img = misc.imread(image_face) # image_path = r'C:\Users\T470P\Desktop\test\faces.png' # 读入图片 # 识别人脸,bounding_boxex为图像矩阵 bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
# output_dir_path = '/..Path to output folder../' output_dir_path = 'output/' output_dir = os.path.expanduser(output_dir_path) if not os.path.exists(output_dir): os.makedirs(output_dir) datadir = 'face_folder/' dataset = facenet.get_dataset(datadir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, 'align/') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 image_size = 182 # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) print('Goodluck') with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0
def collect_data(self): output_dir = os.path.expanduser(self.output_datadir) if not os.path.exists(output_dir): os.makedirs(output_dir) dataset = facenet.get_dataset(self.input_datadir) with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) sess = tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, './npy') minsize = 20 # tamaño mínimo de la cara threshold = [0.6, 0.7, 0.7] # umbral de tres pasos factor = 0.709 # factor de escala margin = 44 image_size = 182 # Agregue una clave aleatoria al nombre de archivo para permitir la alineación mediante múltiples procesos random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join( output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext( os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') print("Imagen: %s" % image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Incapaz de alinear "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) print('to_rgb data dimension: ', img.ndim) img = img[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Número de caras detectadas: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: bounding_box_size = ( det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum( np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0 ) # un poco de peso extra en el centrado det = det[index, :] det = np.squeeze(det) bb_temp = np.zeros(4, dtype=np.int32) bb_temp[0] = det[0] bb_temp[1] = det[1] bb_temp[2] = det[2] bb_temp[3] = det[3] cropped_temp = img[bb_temp[1]:bb_temp[3], bb_temp[0]:bb_temp[2], :] scaled_temp = misc.imresize( cropped_temp, (image_size, image_size), interp='bilinear') nrof_successfully_aligned += 1 misc.imsave(output_filename, scaled_temp) text_file.write( '%s %d %d %d %d\n' % (output_filename, bb_temp[0], bb_temp[1], bb_temp[2], bb_temp[3])) else: print('Incapaz de alinear "%s"' % image_path) text_file.write('%s\n' % (output_filename)) return (nrof_images_total, nrof_successfully_aligned)
def identify_face(q: Queue): with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading Model') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile, encoding='iso-8859-1') print('Loaded model') c = 0 while True: while q.empty(): time.sleep(0.01) continue file_name = q.get() file_path = os.path.join('result', file_name) video_capture = cv2.VideoCapture(file_path) print('Start recognition on: {}'.format(file_name)) while video_capture.isOpened(): ret, frame = video_capture.read() if not ret: break frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) # resize frame (optional) timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len(frame[0]) or bb[i][3] >= len( frame): print('Face is very close!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] # print("predictions") print(best_class_indices, ' with accuracy ', best_class_probabilities) # print(best_class_probabilities) if best_class_probabilities > 0.53: cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) # boxing face # plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 print('Result Indices: ', best_class_indices[0]) print(HumanNames) for H_i in HumanNames: if HumanNames[ best_class_indices[0]] == H_i: result_names = HumanNames[ best_class_indices[0]] cv2.putText( frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: print('Alignment Failure') c += 1 cv2.imshow('Video_2', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() # cv2.destroyAllWindows() q.task_done() cv2.destroyAllWindows()
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) #facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = face_image.get_dataset(args.name, args.input_dir) print('dataset size', args.name, len(dataset)) print('Creating networks and loading parameters') with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 100 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor if args.name=='lfw' or args.name=='webface' or args.name=='vgg': minsize = 20 threshold = [0.6,0.7,0.9] factor = 0.85 print(minsize) print(threshold) print(factor) # Add a random key to the filename to allow alignment using multiple processes #random_key = np.random.randint(0, high=99999) #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 nrof_changed = 0 nrof_iou3 = 0 nrof_force = 0 for fimage in dataset: if nrof_images_total%100==0: print("Processing %d, (%d)" % (nrof_images_total, nrof_successfully_aligned)) nrof_images_total += 1 image_path = fimage.image_path if not os.path.exists(image_path): print('image not found (%s)'%image_path) continue filename = os.path.splitext(os.path.split(image_path)[1])[0] #print(image_path) try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim<2: print('Unable to align "%s", img dim error' % image_path) #text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) img = img[:,:,0:3] _minsize = minsize if fimage.bbox is not None: _bb = fimage.bbox _minsize = min( [_bb[2]-_bb[0], _bb[3]-_bb[1], img.shape[0]//2, img.shape[1]//2] ) bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor) bindex = -1 nrof_faces = bounding_boxes.shape[0] if fimage.bbox is None and nrof_faces>0: det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] bindex = 0 if nrof_faces>1: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering if fimage.bbox is not None: if nrof_faces>0: assert(bounding_boxes.shape[0]==points.shape[1]) det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] index2 = [0.0, 0] for i in xrange(det.shape[0]): _det = det[i] iou = IOU(fimage.bbox, _det) if iou>index2[0]: index2[0] = iou index2[1] = i if index2[0]>-0.3: bindex = index2[1] nrof_iou3+=1 if bindex<0: bounding_boxes, points = detect_face.detect_face_force(img, fimage.bbox, pnet, rnet, onet) bindex = 0 nrof_force+=1 #if bindex<0: # _img = img[fimage.bbox[1]:fimage.bbox[3], fimage.bbox[0]:fimage.bbox[2],:] # woffset = fimage.bbox[0] # hoffset = fimage.bbox[1] # _minsize = min( [_img.shape[0]//3, _img.shape[1]//3] ) # bounding_boxes, points = detect_face.detect_face(_img, _minsize, pnet, rnet, onet, [0.6,0.7,0.01], factor) # nrof_faces = bounding_boxes.shape[0] # print(nrof_faces) # if nrof_faces>0: # #print(points.shape) # #assert(nrof_faces>0) # bounding_boxes[:,0]+=woffset # bounding_boxes[:,2]+=woffset # bounding_boxes[:,1]+=hoffset # bounding_boxes[:,3]+=hoffset # points[0:5,:] += woffset # points[5:10,:] += hoffset # bindex = 0 # score = bounding_boxes[bindex,4] # print(score) # if score<=0.0: # bindex = -1 # else: # nrof_force+=1 #if bindex<0: # _bb = fimage.bbox # _minsize = min( [_bb[2]-_bb[0], _bb[3]-_bb[1], img.shape[0]//2, img.shape[1]//2] ) # bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, [0.6,0.7,0.1], factor) # nrof_faces = bounding_boxes.shape[0] # print(nrof_faces) # if nrof_faces>0: # bindex = 0 #if fimage.bbox is not None and bounding_boxes.shape[0]==0: # bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, [0.6,0.7,0.3], factor) #print(bounding_boxes.shape, points.shape) #print(nrof_faces, points.shape) if bindex>=0: det = bounding_boxes[:,0:4] det = det[bindex,:] points = points[:, bindex] #points need to be transpose, points = points.reshape( (5,2) ).transpose() det = np.squeeze(det) #bb = np.zeros(4, dtype=np.int32) #bb[0] = np.maximum(det[0]-args.margin/2, 0) #bb[1] = np.maximum(det[1]-args.margin/2, 0) #bb[2] = np.minimum(det[2]+args.margin/2, img_size[1]) #bb[3] = np.minimum(det[3]+args.margin/2, img_size[0]) bb = det #print(points.shape) points = list(points.flatten()) assert(len(points)==10) #cropped = img[bb[1]:bb[3],bb[0]:bb[2],:] #scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear') #misc.imsave(output_filename, scaled) nrof_successfully_aligned += 1 oline = '%d\t%s\t%d\t%d\t%d\t%d\t%d\t' % (0,fimage.image_path, int(fimage.classname), bb[0], bb[1], bb[2], bb[3]) oline += '\t'.join([str(x) for x in points]) text_file.write("%s\n"%oline) else: print('Unable to align "%s", no face detected' % image_path) if args.force>0: if fimage.bbox is None: oline = '%d\t%s\t%d\n' % (0,fimage.image_path, int(fimage.classname)) else: bb = fimage.bbox oline = '%d\t%s\t%d\t%d\t%d\t%d\t%d\n' % (0,fimage.image_path, int(fimage.classname), bb[0], bb[1], bb[2], bb[3]) text_file.write(oline) #text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned) print('Number of changed: %d' % nrof_changed) print('Number of iou3: %d' % nrof_iou3) print('Number of force: %d' % nrof_force)
def mtcnn_cut(fin,fout): fin = fin fout = fout minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 #qeaeas print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, r'.\20170512-110547') i = 0 for file in os.listdir(fin): try: file_fullname = fin + '/' + file img = misc.imread(file_fullname) # i+= 1 # img = misc.imread(image_path) bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] # 人脸数目 print(nrof_faces) # print('找到人脸数目为:{}'.format(nrof_faces)) # print(bounding_boxes) crop_faces = [] if nrof_faces != 0: for face_position in bounding_boxes: face_position = face_position.astype(int) print(face_position[0:4]) cv2.rectangle(img, (face_position[0], face_position[1]), (face_position[2], face_position[3]), (0, 255, 0), 2) crop = img[face_position[1]:face_position[3], face_position[0]:face_position[2], ] # print(crop) # crop = cv2.resize(crop, (96, 96), interpolation=cv2.INTER_CUBIC) crop_faces.append(crop) img2 = Image.open(file_fullname) a = face_position[0:4] # print('crop_faces:',crop_faces) # a = [face_position[0:4]] box = (a) roi = img2.crop(box) i = roi.resize((250, 250)) out_path = fout + '/' + file i.save(out_path) print('success') else: pass except: pass
def captureAndIdentify(cwd, relative_path): modeldir = relative_path + '/model/20170511-185253.pb' classifier_filename = relative_path + '/class/classifier.pkl' npy = relative_path + '/npy' train_img = relative_path + '/train_img' WINDOW_TITLE = "Take photo using SPACE to continue with the process." os.chdir(cwd + '/FaceRecognition/') if not hasattr(sys, 'argv'): sys.argv = [''] sys.path.append('.') import facenet import detect_face import tensorflow as tf cam = cv2.VideoCapture(0) cv2.namedWindow(WINDOW_TITLE) img_name = '' result_names = '' while cv2.getWindowProperty(WINDOW_TITLE, 0) >= 0: ret, frame = cam.read() cv2.imshow(WINDOW_TITLE, frame) if not ret: break k = cv2.waitKey(1) if k % 256 == 32: # SPACE pressed img_name = "capture.png" cv2.imwrite(os.getcwd() + "/" + img_name, frame) print("{} written!".format(img_name)) break if k % 256 == 27: # ESC pressed print("Escape hit, closing...") break cam.release() cv2.destroyAllWindows() if img_name == '': return 'Error: Did not capture anything. Press SPACE to capture a photo.' with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn( sess, os.path.expanduser(npy)) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(os.path.expanduser(train_img)) HumanNames.sort() print('Loading feature extraction model') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile, encoding='latin1') # video_capture = cv2.VideoCapture("akshay_mov.mp4") c = 0 print('Start Recognition!') prevTime = 0 # ret, frame = video_capture.read() frame = cv2.imread(img_name, 0) os.remove('capture.png') # clean up # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Faces Detected: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is too close') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) # print(best_class_indices) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] print('Best class indicies: ', best_class_indices) print('Best class probabilites: ', best_class_probabilities) if len([ x for x in predictions[0].tolist() if x >= 0.8 ]) == 0: print('No Valid Faces') return 'Error: No valid faces detected. Will not continue with the process.' else: print('Here') cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 print('Result Indices: ', best_class_indices[0]) print('Human Names: ', HumanNames) for H_i in HumanNames: print('Human at index: ', H_i) if HumanNames[best_class_indices[0]] == H_i: result_names = HumanNames[ best_class_indices[0]] cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: return 'Error: No faces detected. Will not continue with the process.' cv2.imshow('Valid faces detected. Close window to proceeed.', frame) while cv2.getWindowProperty( 'Valid faces detected. Close window to proceeed.', 0) >= 0: k = cv2.waitKey(1) if k % 256 == 27: # ESC pressed print("Escape hit, closing...") break cv2.destroyAllWindows() return result_names
def open_dialog(self): filename = askopenfilename() print(filename) if ".jpg" in filename: print("OK") img_path = filename modeldir = './model/20170511-185253.pb' classifier_filename = './class/classifier.pkl' npy = './npy' train_img = "./train_img" with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading feature extraction model') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph( ).get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser( classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) #video_capture = cv2.VideoCapture("akshay_mov.mp4") c = 0 print('Start Recognition!') prevTime = 0 # ret, frame = video_capture.read() frame = cv2.imread(img_path, 0) frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) # resize frame (optional) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Face Detected: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len(frame[0]) or bb[i][3] >= len( frame): print('face is too close') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) #print(predictions) best_class_indices = np.argmax(predictions, axis=1) #print(best_class_indices) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] #print(best_class_probabilities) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) # boxing face # plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 print('Result Indices: ', best_class_indices[0]) print(HumanNames) for H_i in HumanNames: print(H_i) if HumanNames[ best_class_indices[0]] == H_i: result_names = HumanNames[ best_class_indices[0]] cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), thickness=1, lineType=2) print("RES NAME = " + result_names) #global result #globalresult = result result = result_names print(type(result.split('_'))) str = result.split('_') name_str = str[0] id_str = str[1] id_int = int(id_str) print(id_int) with open(r"schooldata.csv", "a", encoding='utf-8') as file: a_pen = csv.writer(file) now = datetime.now() auth_time = now.strftime("%H:%M") a_pen.writerow( (name_str + "", id_int, auth_time, "True")) file.close() else: print('Unable to align') cv2.imshow('Image', frame) if cv2.waitKey(1000000) & 0xFF == ord('q'): sys.exit("Thanks") #cv2.destroyAllWindows() #os.system("find /home/opencv/PycharmProjects/gui2/bb.jpg") else: mb.showerror("Input Error", "Image must be .jpg file")
dirName = "./newDataset/" + resultName if not os.path.exists(dirName): os.mkdir(dirName) resultName = resultName + '.jpg' cv2.imwrite(os.path.join(dirName, resultName), frame) print(" New face added into the directory path: ", dirName) # cv2.imwrite(resultName, frame) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, './models/') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 # HumanNames = ['Andrew','Obama','ZiLin'] #train human name print('Loading feature extraction model') # modeldir = './All_trained_models/models-02/20170512-110547-02' #pre-trained model modeldir = './models/20170512-110547' #pre-trained model
def main(): global face_detected global save_pic global ii model_path = "models/20170511-185253.pb" # classifier_output_path = "/mnt/softwares/acv_project_code/Code/classifier_rf1_team.pkl" classifier_output_path = "models/classifier_rf4.pkl" #classifier_output_path = "/mnt/softwares/acv_project_code/Code/classfier_path/classifier_svm.pkl" with gfile.FastGFile(model_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name='') images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embedding_layer = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name( "phase_train:0") gpu_memory_fraction = 0.5 with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess1 = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) # sess1 = tf.Session(config=tf.ConfigProto(device_count = {'GPU': 0})) with sess1.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess1, None) model, class_names = pickle.load(open(classifier_output_path, 'rb'), encoding='latin1') #cap = cv2.VideoCapture(0) #cap = cv2.VideoCapture("rtsp://*****:*****@10.194.2.141:554/"); #rtsp://admin:[email protected]:554/cam/realmonitor?channel=1&subtype=1 #cap_2 = cv2.VideoCapture("rtsp://*****:*****@10.194.2.51:554/") cap_2 = cv2.VideoCapture( "rtsp://*****:*****@[email protected]:554/cam/realmonitor?channel=1&subtype=0" ) # cap = cv2.VideoCapture('/home/lokender/Downloads/orig_faces/videos/nayeem.mp4') # cap = cv2.VideoCapture('/home/lokender/Downloads/orig_faces/videos/lokender.mp4') fno = 0 det_name = [] det_prob = [] bbs = [] i = 0 while (~(cv2.waitKey(1) & 0xFF == ord('q'))): #print(time.strftime("%H:%M:%S")) ''' # image2 = cv2.imread("/home/lokender/Downloads/T1/both/IMG_20171115_150720.jpg") # image2.set_shape((480, 640, 3)) # image2= cv2.resize(image2, (640,480)) ret, image1 = cap.read() image2 = cv2.resize(image1, (320, 240)) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor img = image2[:, :, 0:3] print('it - 1') bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) print('it - 2') nrof_faces = bounding_boxes.shape[0] print(nrof_faces) if nrof_faces == 1: path = os.path.join(os.getcwd(), folder, str(i)+'.jpg') cv2.imwrite(path, image1) i = i+1 face_detected = True print("taking first imagae_" + str(i)) cv2.imshow('fr', image2) fno = fno + 1 ''' # image2 = cv2.imread("/home/lokender/Downloads/T1/both/IMG_20171115_150720.jpg") # image2.set_shape((480, 640, 3)) # image2= cv2.resize(image2, (640,480)) ret, image3 = cap_2.read() image4 = cv2.resize(image3, (600, 400)) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor img = image4[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] #print(nrof_faces) if nrof_faces == 1: save_pic = save_pic + 1 print("save_pic: " + str(save_pic)) if save_pic == 50: #path = os.path.join(os.getcwd(), folder_2, str(i)+'.jpg') name_time = int(time.time()) print(name_time) if ii <= 5: ii += 1 else: ii = 0 name_time = str(name_time) + str(ii) path = os.path.join(folder_path, str(name_time) + '.jpg') print(path) cv2.imwrite(path, image3) #i = i+1 face_detected = True print("taking first imagae") #_" + str(i)) save_pic = 0 #cv2.resize(image4,(600,400)) cv2.imshow('Room Camera Live Feed', image4) fno = fno + 1 #cap.release() cap_2.release() cv2.destroyAllWindows()
def main(): model_path = "models/20170511-185253.pb" # classifier_output_path = "/mnt/softwares/acv_project_code/Code/classifier_rf1_team.pkl" classifier_output_path = "models/classifier_rf4.pkl" #classifier_output_path = "/mnt/softwares/acv_project_code/Code/classfier_path/classifier_svm.pkl" with gfile.FastGFile(model_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name='') images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embedding_layer = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") gpu_memory_fraction = 1 with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) sess1 = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) # sess1 = tf.Session(config=tf.ConfigProto(device_count = {'GPU': 0})) with sess1.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess1, None) model, class_names = pickle.load(open(classifier_output_path, 'rb'), encoding='latin1') cap = cv2.VideoCapture(0) # cap = cv2.VideoCapture('/home/lokender/Downloads/orig_faces/videos/nayeem.mp4') # cap = cv2.VideoCapture('/home/lokender/Downloads/orig_faces/videos/lokender.mp4') fno = 0 det_name = [] det_prob =[] bbs = [] while (~(cv2.waitKey(1) & 0xFF == ord('q'))): # image2 = cv2.imread("/home/lokender/Downloads/T1/both/IMG_20171115_150720.jpg") # image2.set_shape((480, 640, 3)) # image2= cv2.resize(image2, (640,480)) ret, image2 = cap.read() image2 = cv2.resize(image2, (320, 240)) if fno % 5 == 0: # image2 = cv2.imread("/home/lokender/Downloads/T1/both/IMG_20171115_150720.jpg") # image2.set_shape((480, 640, 3)) # image2= cv2.resize(image2, (640,480)) # image2 = cv2.imread("/home/lokender/Downloads/T1/both/IMG_20171115_150720.jpg") # image2.set_shape((480, 640, 3)) # image2= cv2.resize(image2, (640,480)) print(fno) # image2=rotate_bound(image1,90) # image2 = cv2.imread('/home/lokender/Downloads/acv_tmp/tm_al/tmp/frame_0.png', cv2.IMREAD_COLOR) # cv2.imwrite("/home/lokender/Downloads/acv_tmp/tm/tmp/frame.png", image2) image_size = 160 margin = 32 detect_multiple_faces = True minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor img = image2[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print(nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack( [(det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0]]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) det_name = [] det_prob =[] bbs = [] for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] bbs.append(bb) scaled = misc.imresize(cropped, (image_size, image_size), interp='bilinear') # nrof_successfully_aligned += 1 # output_filename_n = "{}_{}.{}".format(output_filename.split('.')[0], i, # output_filename.split('.')[-1]) # misc.imsave(output_filename_n, scaled) # config=tf.ConfigProto(device_count = {'GPU': 0}) with tf.Session(config=tf.ConfigProto(gpu_options=(tf.GPUOptions(per_process_gpu_memory_fraction=1)))) as sess: image_paths = ['/home/nayeem/Desktop/acv_live_face_recognition_project/src/images/frame_0.png'] image_size = 160 batch_size = 1 num_threads = 1 num_epochs = 1 label_list = [0] images = ops.convert_to_tensor(image_paths, dtype=tf.string) labels = ops.convert_to_tensor(label_list, dtype=tf.int32) # Makes an input queue input_queue = tf.train.slice_input_producer((images, labels), num_epochs=num_epochs, shuffle=False, ) images_labels = [] image = tf.convert_to_tensor(scaled) label = input_queue[1] # image = tf.random_crop(image, size=[image_size, image_size, 3]) # image.set_shape((image_size, image_size, 3)) image = tf.image.per_image_standardization(image) images_labels.append([image, label]) num_threads = 16 images, labels = tf.train.batch_join(images_labels, batch_size=batch_size, capacity=4 * num_threads, enqueue_many=False, allow_smaller_final_batch=True) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) emb_array = None batch_images, batch_labels = sess.run([images, labels]) emb = sess.run(embedding_layer, feed_dict={images_placeholder: batch_images, phase_train_placeholder: False}) emb_array = np.concatenate([emb_array, emb]) if emb_array is not None else emb coord.request_stop() coord.join(threads=threads) predictions = model.predict_proba(emb_array, ) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] for ji in range(len(best_class_indices)): print( '%4d %s: %.3f' % ( ji, class_names[best_class_indices[ji]], best_class_probabilities[ji])) det_name.append(class_names[best_class_indices[ji]]) det_prob.append(best_class_probabilities[ji]) colors = [[255, 0, 0], [0, 255, 0], [0, 0, 255], [255, 255, 0], [0, 255, 255], [255, 0, 255]] for jk in range(len(det_name)): # print jk bbt = bbs[jk] if det_prob[jk]>=0.5: cv2.rectangle(image2, (bbt[0], bbt[1]), (bbt[0] + (bbt[2] - bbt[0]), bbt[1] + (bbt[3] - bbt[1])), colors[jk], 2) cv2.putText(image2, det_name[jk], (bbt[0] + (bbt[2] - bbt[0]) + 10, bbt[1] + (bbt[3] - bbt[1])), 0, 0.5, colors[jk]) cv2.imshow('fr', image2) fno = fno + 1 cap.release() cv2.destroyAllWindows()
# KNN Classifier def knn_classifier(train_x, train_y): from sklearn.neighbors import KNeighborsClassifier model = KNeighborsClassifier() model.fit(train_x, train_y) return model if __name__ == '__main__': #建立人脸检测模型,加载参数 print('Creating networks and loading parameters') gpu_memory_fraction=1.0 with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, './model_check_point/') #建立facenet embedding模型 print('建立facenet embedding模型') tf.Graph().as_default() sess = tf.Session() images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, 3), name='input') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') embeddings = network.inference(images_placeholder, pool_type,
def Detect(): # dectect image in img_path ans = "Unknown" print(sys.path) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading feature extraction model') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) # video_capture = cv2.VideoCapture("akshay_mov.mp4") c = 0 print('Start Recognition!') prevTime = 0 # ret, frame = video_capture.read() frame = cv2.imread(img_path, 0) frame1 = cv2.imread(img_path) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Face Detected: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) if nrof_faces > 1: cv2.imwrite('result.jpg', frame1) return ans for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): print('face is too close') cv2.imwrite('result.jpg', frame1) return ans continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append(misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize(scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape(-1, input_image_size, input_image_size, 3)) feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) # print(best_class_indices) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] print(best_class_probabilities) cv2.rectangle(frame1, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) # boxing face # plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 result_names = HumanNames[best_class_indices[0]] print('Result Indices: ', result_names) ans = result_names cv2.putText(frame1, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), thickness=1, lineType=2) else: print('Unable to align') cv2.imwrite('result.jpg', frame1) return ans
import time import pickle input_video = "YOUR VIDEO PATH" modeldir = './model/20170512-110547.pb' classifier_filename = './classifier/classifier_2017.pkl' npy = './npy' train_img = "./train_img" now = time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime(time.time())) with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 53 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading Modal') facenet.load_model(modeldir)
def main(args): sleep(random.random()) output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = facenet.get_dataset(args.input_dir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if args.random_order: random.shuffle(dataset) for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename+'.png') print(image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim<2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:,:,0:3] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces>0: det = bounding_boxes[:,0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces>1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering det_arr.append(det[index,:]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0]-args.margin/2, 0) bb[1] = np.maximum(det[1]-args.margin/2, 0) bb[2] = np.minimum(det[2]+args.margin/2, img_size[1]) bb[3] = np.minimum(det[3]+args.margin/2, img_size[0]) cropped = img[bb[1]:bb[3],bb[0]:bb[2],:] scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear') nrof_successfully_aligned += 1 filename_base, file_extension = os.path.splitext(output_filename) if args.detect_multiple_faces: output_filename_n = "{}_{}{}".format(filename_base, i, file_extension) else: output_filename_n = "{}{}".format(filename_base, file_extension) misc.imsave(output_filename_n, scaled) text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
import os from os.path import join as pjoin import sys import time import copy import math import pickle from sklearn.svm import SVC from sklearn.externals import joblib print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, './Path to det1.npy,..') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = ['Human_a','Human_b','Human_c','...','Human_h'] #train human name print('Loading feature extraction model') modeldir = '/..Path to pre-trained model../20170512-110547/20170512-110547.pb' facenet.load_model(modeldir)
def main(): listpath = '/home/ubuntu/Desktop/PRfacenet/test/list_candidate' print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn( sess, '/home/ubuntu/Desktop/PRfacenet/src/align') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 32 frame_interval = 3 batch_size = 1000 image_size = 160 input_image_size = 160 print('Loading feature extraction model') modeldir = '/home/ubuntu/Desktop/PRfacenet/casia_pre_trained_model/20170511-185253.pb' facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename = '/home/ubuntu/Desktop/PRfacenet/PR-classifier/PRcasia_classifier.pkl' classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) print('load classifier file-> %s' % classifier_filename_exp) #capture video frame video_capture = cv2.VideoCapture('/media/ubuntu/MULTIBOOT/q.mkv') c = 0 # frame_width = 640 # frame_height = 480 # fourcc = cv2.VideoWriter_fourcc('P','I','M','1') # out = cv2.VideoWriter('/home/ubuntu/Desktop/output.avi',fourcc, 20.0, (frame_width,frame_height),True) print('Start Recognition!') prevTime = 0 while True: ret, frame = video_capture.read() if frame.shape[0] == 0: break else: num_rows, num_cols = frame.shape[:2] rotation_matrix = cv2.getRotationMatrix2D( (num_cols / 2, num_rows / 2), 0, 1) frame = cv2.warpAffine(frame, rotation_matrix, (num_cols, num_rows)) frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) #resize frame (optional) print(' %d %d ' % (frame.shape[0], frame.shape[1])) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) print('', len(bounding_boxes)) print(frame.shape) if len(bounding_boxes) > 0: # if bounding_boxes nrof_faces = bounding_boxes.shape[0] print('Detected_Face: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] print(det) img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(0, nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] #x topleft bb[i][1] = det[i][1] #y topleft bb[i][2] = det[i][2] #x bottom right bb[i][3] = det[i][3] #y bottom right print( "bb[i][0]:%d,bb[i][1]:%d,bb[i][2]:%d,bb[i][3]:%d" % (bb[i][0], bb[i][1], bb[i][2], bb[i][3])) # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len( frame[0]) or bb[i][3] >= len( frame): break if det[i][0] >= 0 and det[i][ 1] >= 0 and det[i][ 2] <= frame.shape[ 1] and bounding_boxes[i][ 3] <= frame.shape[0]: print('face is inner of range!') # continue cropped.append( frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip( cropped[i], False) # print (cropped[i].shape) scaled.append( misc.imresize( cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten( scaled[i]) scaled_reshape.append( scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run( embeddings, feed_dict=feed_dict) predictions = model.predict_proba( emb_array) best_class_indices = np.argmax( predictions, axis=1) print(len) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] cv2.rectangle( frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 1 ) #boxing face #top left ++++ bottom right #plot result idx under box text_x = bb[i][0] text_y = bb[i][1] print( 'result: %s ' % class_names[best_class_indices[0]]) cadiname = class_names[ best_class_indices[0]] print('accuracy:%.4f ' % best_class_probabilities[0]) accuracy = best_class_probabilities[0] if best_class_probabilities[0] >= 0.6: f_text = open( os.path.join( '%s/name_of_candidate.txt' % listpath), 'w') f_text.write(class_names[ best_class_indices[0]] + '\n') cv2.putText( frame, class_names[ best_class_indices[0]], (text_x, text_y - 10), cv2.FONT_HERSHEY_DUPLEX, 0.7, (0, 255, 255), thickness=1, lineType=1) cv2.putText( frame, '%3s' % accuracy, (text_x, text_y - 30), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 255, 255), thickness=1, lineType=1) if best_class_probabilities[0] <= 0.6: cv2.putText( frame, 'Unknown', (text_x, text_y - 10), cv2.FONT_HERSHEY_DUPLEX, 0.7, (0, 255, 255), thickness=1, lineType=1) else: print( 'Nobody is in the camera zone!!!!') sec = curTime - prevTime prevTime = curTime fps = 1 / (sec) str = 'FPS: %2.3f' % fps text_fps_x = len(frame[0]) - 320 text_fps_y = 20 cv2.putText(frame, str, (text_fps_x, text_fps_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2) cv2.imshow('Video', frame) if cv2.waitKey(33) == 27: break video_capture.release() cv2.destroyAllWindows()
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory #facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) image_dir = os.path.join(args.input_dir, 'facescrub') dataset = face_image.get_dataset('facescrub', image_dir) print('dataset size', len(dataset)) bbox = {} for label_file in ['facescrub_actors.txt', 'facescrub_actresses.txt']: label_file = os.path.join(args.input_dir, label_file) pp = 0 for line in open(label_file, 'r'): pp+=1 if pp==1: continue vec = line.split("\t") key = (vec[0], int(vec[2])) value = [int(x) for x in vec[4].split(',')] bbox[key] = value print('bbox size', len(bbox)) valid_key = {} json_data = open(os.path.join(args.input_dir, 'facescrub_uncropped_features_list.json')).read() json_data = json.loads(json_data)['path'] for _data in json_data: key = _data.split('/')[-1] pos = key.rfind('.') if pos<0: print(_data) else: key = key[0:pos] keys = key.split('_') #print(key) if len(keys)!=2: print('err', key, _data) continue #assert len(keys)==2 key = (keys[0], int(keys[1])) valid_key[key] = 1 #print(key) print('valid keys', len(valid_key)) print('Creating networks and loading parameters') with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 100 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor image_size = [112,96] image_size = [112,112] src = np.array([ [30.2946, 51.6963], [65.5318, 51.5014], [48.0252, 71.7366], [33.5493, 92.3655], [62.7299, 92.2041] ], dtype=np.float32 ) if image_size[1]==112: src[:,0] += 8.0 # Add a random key to the filename to allow alignment using multiple processes #random_key = np.random.randint(0, high=99999) #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) #output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) output_filename = os.path.join(args.output_dir, 'lst') with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof = np.zeros( (5,), dtype=np.int32) for fimage in dataset: if nrof_images_total%100==0: print("Processing %d, (%s)" % (nrof_images_total, nrof)) nrof_images_total += 1 #if nrof_images_total<950000: # continue image_path = fimage.image_path if not os.path.exists(image_path): print('image not found (%s)'%image_path) continue #print(image_path) filename = os.path.splitext(os.path.split(image_path)[1])[0] _paths = fimage.image_path.split('/') print(fimage.image_path) a,b = _paths[-2], _paths[-1] pb = b.rfind('.') bname = b[0:pb] pb = bname.rfind('_') body = bname[(pb+1):] img_id = int(body) key = (a, img_id) if not key in valid_key: continue #print(b, img_id) assert key in bbox fimage.bbox = bbox[key] try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim<2: print('Unable to align "%s", img dim error' % image_path) #text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) img = img[:,:,0:3] tb = bname.replace(' ','_')+".png" ta = a.replace(' ','_') target_dir = os.path.join(args.output_dir, ta) if not os.path.exists(target_dir): os.makedirs(target_dir) target_file = os.path.join(target_dir, tb) warped = None if fimage.landmark is not None: dst = fimage.landmark.astype(np.float32) tform = trans.SimilarityTransform() tform.estimate(dst, src[0:3,:]*1.5+image_size[0]*0.25) M = tform.params[0:2,:] warped0 = cv2.warpAffine(img,M,(image_size[1]*2,image_size[0]*2), borderValue = 0.0) _minsize = image_size[0] bounding_boxes, points = detect_face.detect_face(warped0, _minsize, pnet, rnet, onet, threshold, factor) if bounding_boxes.shape[0]>0: bindex = 0 det = bounding_boxes[bindex,0:4] #points need to be transpose, points = points.reshape( (5,2) ).transpose() dst = points[:, bindex].reshape( (2,5) ).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2,:] warped = cv2.warpAffine(warped0,M,(image_size[1],image_size[0]), borderValue = 0.0) nrof[0]+=1 #assert fimage.bbox is not None if warped is None and fimage.bbox is not None: _minsize = img.shape[0]//4 bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor) if bounding_boxes.shape[0]>0: det = bounding_boxes[:,0:4] bindex = -1 index2 = [0.0, 0] for i in xrange(det.shape[0]): _det = det[i] iou = IOU(fimage.bbox, _det) if iou>index2[0]: index2[0] = iou index2[1] = i if index2[0]>0.3: bindex = index2[1] if bindex>=0: dst = points[:, bindex].reshape( (2,5) ).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2,:] warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0) nrof[1]+=1 #print('1',target_file,index2[0]) if warped is None and fimage.bbox is not None: bb = fimage.bbox #croped = img[bb[1]:bb[3],bb[0]:bb[2],:] bounding_boxes, points = detect_face.detect_face_force(img, bb, pnet, rnet, onet) assert bounding_boxes.shape[0]==1 _box = bounding_boxes[0] if _box[4]>=0.3: dst = points[:, 0].reshape( (2,5) ).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2,:] warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0) nrof[2]+=1 #print('2',target_file) if warped is None: roi = np.zeros( (4,), dtype=np.int32) roi[0] = int(img.shape[1]*0.06) roi[1] = int(img.shape[0]*0.06) roi[2] = img.shape[1]-roi[0] roi[3] = img.shape[0]-roi[1] if fimage.bbox is not None: bb = fimage.bbox h = bb[3]-bb[1] w = bb[2]-bb[0] x = bb[0] y = bb[1] #roi = np.copy(bb) _w = int( (float(h)/image_size[0])*image_size[1] ) x += (w-_w)//2 #x = min( max(0,x), img.shape[1] ) x = max(0,x) xw = x+_w xw = min(xw, img.shape[1]) roi = np.array( (x, y, xw, y+h), dtype=np.int32) nrof[3]+=1 else: nrof[4]+=1 #print('3',bb,roi,img.shape) #print('3',target_file) warped = img[roi[1]:roi[3],roi[0]:roi[2],:] #print(warped.shape) warped = cv2.resize(warped, (image_size[1], image_size[0])) bgr = warped[...,::-1] cv2.imwrite(target_file, bgr) oline = '%d\t%s\t%d\n' % (1,target_file, int(fimage.classname)) text_file.write(oline)
def main(args): sleep(random.random()) output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = facenet.get_dataset(args.input_dir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if args.random_order: random.shuffle(dataset) for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename+'.jpg') print(image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim<2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:,:,0:3] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces>0: det = bounding_boxes[:,0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces>1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering det_arr.append(det[index,:]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0]-args.margin/2, 0) bb[1] = np.maximum(det[1]-args.margin/2, 0) bb[2] = np.minimum(det[2]+args.margin/2, img_size[1]) bb[3] = np.minimum(det[3]+args.margin/2, img_size[0]) cropped = img[bb[1]:bb[3],bb[0]:bb[2],:] scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear') nrof_successfully_aligned += 1 filename_base, file_extension = os.path.splitext(output_filename) if args.detect_multiple_faces: output_filename_n = "{}_{}{}".format(filename_base, i, file_extension) else: output_filename_n = "{}{}".format(filename_base, file_extension) misc.imsave(output_filename_n, scaled) text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory #facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) image_dir = os.path.join(args.input_dir, 'facescrub') dataset = face_image.get_dataset('facescrub', image_dir) print('dataset size', len(dataset)) bbox = {} for label_file in ['facescrub_actors.txt', 'facescrub_actresses.txt']: label_file = os.path.join(args.input_dir, label_file) pp = 0 for line in open(label_file, 'r'): pp += 1 if pp == 1: continue vec = line.split("\t") key = (vec[0], int(vec[2])) value = [int(x) for x in vec[4].split(',')] bbox[key] = value print('bbox size', len(bbox)) valid_key = {} json_data = open( os.path.join(args.input_dir, 'facescrub_uncropped_features_list.json')).read() json_data = json.loads(json_data)['path'] for _data in json_data: key = _data.split('/')[-1] pos = key.rfind('.') if pos < 0: print(_data) else: key = key[0:pos] keys = key.split('_') #print(key) if len(keys) != 2: print('err', key, _data) continue #assert len(keys)==2 key = (keys[0], int(keys[1])) valid_key[key] = 1 #print(key) print('valid keys', len(valid_key)) print('Creating networks and loading parameters') with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 100 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor image_size = [112, 96] image_size = [112, 112] src = np.array([[30.2946, 51.6963], [65.5318, 51.5014], [48.0252, 71.7366], [33.5493, 92.3655], [62.7299, 92.2041]], dtype=np.float32) if image_size[1] == 112: src[:, 0] += 8.0 # Add a random key to the filename to allow alignment using multiple processes #random_key = np.random.randint(0, high=99999) #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) #output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) output_filename = os.path.join(args.output_dir, 'lst') with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof = np.zeros((5, ), dtype=np.int32) for fimage in dataset: if nrof_images_total % 100 == 0: print("Processing %d, (%s)" % (nrof_images_total, nrof)) nrof_images_total += 1 #if nrof_images_total<950000: # continue image_path = fimage.image_path if not os.path.exists(image_path): print('image not found (%s)' % image_path) continue #print(image_path) filename = os.path.splitext(os.path.split(image_path)[1])[0] _paths = fimage.image_path.split('/') # print(fimage.image_path) a, b = _paths[-2], _paths[-1] pb = b.rfind('.') bname = b[0:pb] pb = bname.rfind('_') body = bname[(pb + 1):] img_id = int(body) # try: # img_id = int(body) # except ValueError as e: # bname = b # pb = bname.rfind('_') # body = bname[(pb+1):] # img_id = int(body) # print('image_path: %s' % image_path) # print('a: %s' % a) # print('b: %s' % b) # print('pb: %s' % pb) # print('bname: %s' % bname) # print('body: %s' % body) # exit(-1) key = (a, img_id) if not key in valid_key: continue #print(b, img_id) assert key in bbox fimage.bbox = bbox[key] try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s", img dim error' % image_path) #text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) img = img[:, :, 0:3] tb = bname.replace(' ', '_') + ".png" ta = a.replace(' ', '_') target_dir = os.path.join(args.output_dir, ta) if not os.path.exists(target_dir): os.makedirs(target_dir) target_file = os.path.join(target_dir, tb) warped = None if fimage.landmark is not None: dst = fimage.landmark.astype(np.float32) tform = trans.SimilarityTransform() tform.estimate(dst, src[0:3, :] * 1.5 + image_size[0] * 0.25) M = tform.params[0:2, :] warped0 = cv2.warpAffine( img, M, (image_size[1] * 2, image_size[0] * 2), borderValue=0.0) _minsize = image_size[0] bounding_boxes, points = detect_face.detect_face( warped0, _minsize, pnet, rnet, onet, threshold, factor) if bounding_boxes.shape[0] > 0: bindex = 0 det = bounding_boxes[bindex, 0:4] #points need to be transpose, points = points.reshape( (5,2) ).transpose() dst = points[:, bindex].reshape((2, 5)).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2, :] warped = cv2.warpAffine(warped0, M, (image_size[1], image_size[0]), borderValue=0.0) nrof[0] += 1 #assert fimage.bbox is not None if warped is None and fimage.bbox is not None: _minsize = img.shape[0] // 4 bounding_boxes, points = detect_face.detect_face( img, _minsize, pnet, rnet, onet, threshold, factor) if bounding_boxes.shape[0] > 0: det = bounding_boxes[:, 0:4] bindex = -1 index2 = [0.0, 0] for i in xrange(det.shape[0]): _det = det[i] iou = IOU(fimage.bbox, _det) if iou > index2[0]: index2[0] = iou index2[1] = i if index2[0] > 0.3: bindex = index2[1] if bindex >= 0: dst = points[:, bindex].reshape((2, 5)).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2, :] warped = cv2.warpAffine( img, M, (image_size[1], image_size[0]), borderValue=0.0) nrof[1] += 1 #print('1',target_file,index2[0]) if warped is None and fimage.bbox is not None: bb = fimage.bbox #croped = img[bb[1]:bb[3],bb[0]:bb[2],:] bounding_boxes, points = detect_face.detect_face_force( img, bb, pnet, rnet, onet) assert bounding_boxes.shape[0] == 1 _box = bounding_boxes[0] if _box[4] >= 0.3: dst = points[:, 0].reshape((2, 5)).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2, :] warped = cv2.warpAffine(img, M, (image_size[1], image_size[0]), borderValue=0.0) nrof[2] += 1 #print('2',target_file) if warped is None: roi = np.zeros((4, ), dtype=np.int32) roi[0] = int(img.shape[1] * 0.06) roi[1] = int(img.shape[0] * 0.06) roi[2] = img.shape[1] - roi[0] roi[3] = img.shape[0] - roi[1] if fimage.bbox is not None: bb = fimage.bbox h = bb[3] - bb[1] w = bb[2] - bb[0] x = bb[0] y = bb[1] #roi = np.copy(bb) _w = int((float(h) / image_size[0]) * image_size[1]) x += (w - _w) // 2 #x = min( max(0,x), img.shape[1] ) x = max(0, x) xw = x + _w xw = min(xw, img.shape[1]) roi = np.array((x, y, xw, y + h), dtype=np.int32) nrof[3] += 1 else: nrof[4] += 1 #print('3',bb,roi,img.shape) #print('3',target_file) warped = img[roi[1]:roi[3], roi[0]:roi[2], :] #print(warped.shape) warped = cv2.resize(warped, (image_size[1], image_size[0])) bgr = warped[..., ::-1] cv2.imwrite(target_file, bgr) oline = '%d\t%s\t%d\n' % (1, target_file, int( fimage.classname)) text_file.write(oline)
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) datamap = {} pp = 0 datasize = 0 verr = 0 for line in open(args.input_dir+"_clean_list.txt", 'r'): pp+=1 if pp%10000==0: print('loading list', pp) line = line.strip()[2:] if not line.startswith('m.'): continue vec = line.split('/') assert len(vec)==2 #print(line) person = vec[0] img = vec[1] try: img_id = int(img.split('.')[0]) except ValueError: #print('value error', line) verr+=1 continue if not person in datamap: labelid = len(datamap) datamap[person] = [labelid, {img_id : 1}] else: datamap[person][1][img_id] = 1 datasize+=1 print('dataset size', args.name, datasize) print('dataset err', verr) print('Creating networks and loading parameters') with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 100 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor print(minsize) print(threshold) print(factor) # Add a random key to the filename to allow alignment using multiple processes #random_key = np.random.randint(0, high=99999) #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 nrof_changed = 0 nrof_iou3 = 0 nrof_force = 0 for line in open(args.input_dir, 'r'): vec = line.strip().split() person = vec[0] img_id = int(vec[1]) v = datamap.get(person, None) if v is None: continue if not img_id in v[1]: continue labelid = v[0] img_str = base64.b64decode(vec[-1]) nparr = np.fromstring(img_str, np.uint8) img = cv2.imdecode(nparr, cv2.CV_LOAD_IMAGE_COLOR) img = img[...,::-1] #to rgb if nrof_images_total%100==0: print("Processing %d, (%d)" % (nrof_images_total, nrof_successfully_aligned)) nrof_images_total += 1 target_dir = os.path.join(output_dir, person) if not os.path.exists(target_dir): os.makedirs(target_dir) target_path = os.path.join(target_dir, "%d.jpg"%img_id) _minsize = minsize fimage = edict() fimage.bbox = None fimage.image_path = target_path fimage.classname = str(labelid) if fimage.bbox is not None: _bb = fimage.bbox _minsize = min( [_bb[2]-_bb[0], _bb[3]-_bb[1], img.shape[0]//2, img.shape[1]//2] ) bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor) bindex = -1 nrof_faces = bounding_boxes.shape[0] if fimage.bbox is None and nrof_faces>0: det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] bindex = 0 if nrof_faces>1: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering if fimage.bbox is not None: if nrof_faces>0: assert(bounding_boxes.shape[0]==points.shape[1]) det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] index2 = [0.0, 0] for i in xrange(det.shape[0]): _det = det[i] iou = IOU(fimage.bbox, _det) if iou>index2[0]: index2[0] = iou index2[1] = i if index2[0]>-0.3: bindex = index2[1] nrof_iou3+=1 if bindex<0: bounding_boxes, points = detect_face.detect_face_force(img, fimage.bbox, pnet, rnet, onet) bindex = 0 nrof_force+=1 if bindex>=0: det = bounding_boxes[:,0:4] det = det[bindex,:] points = points[:, bindex] landmark = points.reshape((2,5)).T #points need to be transpose, points = points.reshape( (5,2) ).transpose() det = np.squeeze(det) bb = det points = list(points.flatten()) assert(len(points)==10) warped = face_preprocess.preprocess(img, bbox=bb, landmark = landmark, image_size=args.image_size) misc.imsave(target_path, warped) nrof_successfully_aligned += 1 oline = '%d\t%s\t%d' % (1,fimage.image_path, int(fimage.classname)) #oline = '%d\t%s\t%d\t%d\t%d\t%d\t%d\t' % (0,fimage.image_path, int(fimage.classname), bb[0], bb[1], bb[2], bb[3]) #oline += '\t'.join([str(x) for x in points]) text_file.write("%s\n"%oline) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned) print('Number of changed: %d' % nrof_changed) print('Number of iou3: %d' % nrof_iou3) print('Number of force: %d' % nrof_force)