def __init__(self, model_path): with tf.Graph().as_default(): # mtcnn model gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) self.sess_mtcnn = tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) with self.sess_mtcnn.as_default(): # nets = [pnet, rnet, onet] self.nets = detect_face.create_mtcnn(self.sess_mtcnn, None) # facenet model self.sess_facenet = tf.Session() with self.sess_facenet.as_default(): facenet.load_model(model_path) self.image_tensor = tf.get_default_graph().get_tensor_by_name( "input:0") self.embedding_tensor = tf.get_default_graph( ).get_tensor_by_name("embeddings:0") # True for training and False for testing self.phase_tensor = tf.get_default_graph().get_tensor_by_name( "phase_train:0") print('load model success') # define parameters self.minsize = 20 self.threshold = [0.6, 0.7, 0.7] self.factor = 0.709
def init(self, args=None): if self.model is None: if args is None: self.model = FaceCascade({ 'mode': 'INFERENCE', 'model_dir': '../models/cascade', 'ckpt_prefix': './server/models/12-net/model.ckpt', 'cascade': 12, }) else: self.model = FaceCascade({ 'mode': 'INFERENCE', 'model_dir': '../models/cascade', 'ckpt_prefix': args.model, 'cascade': args.cascade, }) self.threshold = 0.99 self.count_val = 0 self.count_correct = 0 self.count_positive = 0 self.count_true_positive = 0 if self.sess_mtcnn is None: self.sess_mtcnn = tf.Session() self.pnet, self.rnet, self.onet = mtcnn_detect.create_mtcnn( self.sess_mtcnn, None) """if self.fxpress is None: self.fxpress = EmotionRecognition() self.fxpress.build_network()""" if self.emoc is None: self.emoc = EmotionClassifier() self.emoc.build_network(args)
def __init__(self, model_path, min_size=20, tf_config=None): self.minsize = min_size # minimum size of face self.threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold self.factor = 0.709 # scale factor print('Load MTCNN model from ', model_path) self.graph = tf.Graph() with self.graph.as_default(): self.sess = tf.Session(config=tf_config) with self.sess.as_default(): self.pnet, self.rnet, self.onet = detect_face.create_mtcnn(self.sess, model_path=model_path)
def __init__(self): sleep(random.random()) # self.random_order = True config = tf.ConfigProto() config.gpu_options.allow_growth = True # grow memory usage as required config.gpu_options.per_process_gpu_memory_fraction = 0.8 # limit memory usage up to 80% self.sess = tf.Session(config=config) # with sess.as_default(): self.pnet, self.rnet, self.onet = detect_face.create_mtcnn( self.sess, None)
def profiling(args): mtcnn = {} mtcnn['session'] = tf.Session() #self.__pnet, self.__rnet, self.__onet = FaceDetector.create_mtcnn(self.__mtcnn, None) mtcnn['pnet'], mtcnn['rnet'], mtcnn['onet'] = FaceDetector.create_mtcnn( mtcnn['session'], None) emoc = EmotionClassifier() emoc.build_network(None) #facenet_res = facenet.load_model('../models/facenet/20170512-110547.pb') # InceptionResnet V1 facenet_sqz = facenet.load_model( '../models/facenet/20180204-160909') # squeezenet
def main(args): st = time.time() #check if input directory exists if not os.path.exists(args.input_directory): print("Error! No input direcotory", args.input_directory) return -1 # read images images_l, images_paths = load_images(args.input_directory) #create tensorflow session # init. tensorflow session with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.75) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, './mtcnn') #localize and blur faces, iterate over images for image, image_path in zip(images_l, images_paths): print("Processing", image_path + "...") bbs, lds = mtcnn_localize_faces(image, pnet, rnet, onet, minsize=20, threshold=[0.7, 0.8, 0.85], factor=0.75) # jumpt iteration if there's no face if len(bbs) == 0: print("Couldn't find faces!") continue #get faces for bb, ld in zip(bbs, lds): #get bounding box #top, righ, bottom, left top = bb[0] right = bb[1] bottom = bb[2] left = bb[3] # build landmarks' x, y pairs points = [] for x, y in zip(ld[:5], ld[5:]): points.append(x) points.append(y) #get face thumbnail face_image = image[top:bottom, left:right] #blur face thumbnail if args.blur > 0: face_image = cv2.GaussianBlur(face_image, (105, 105), args.blur) #black else: face_image = np.zeros(face_image.shape) #write blured face to image image[top:bottom, left:right] = face_image #PIL image # pil_image = Image.fromarray(image) # pil_image_face = Image.fromarray(face_image) #eyes' landmarks: first two pairs # get larger rectangle # points[0] = points[0] * 0.9 # points[1] = points[1] * 0.9 # points[2] = points[2] * 1.1 # points[3] = points[3] * 1.1 # draw = ImageDraw.Draw(pil_image) #cover eyes with rectangle # draw.rectangle(points[:4], fill="black") #create output directory if it doesn't exist if not os.path.exists(args.output_directory): os.makedirs(args.output_directory) #save image pil_image = Image.fromarray(image) pil_image.save(os.path.join(args.output_directory, image_path)) print("Total running time:", time.time() - st, "sec.") return 0
def main(args): sleep(random.random()) output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = facenet.get_dataset(args.input_dir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) configs_ = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False) configs_.gpu_options.allow_growth = True sess = tf.Session(config=configs_) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if args.random_order: random.shuffle(dataset) for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename+'.png') print(image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim<2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:,:,0:3] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces>0: det = bounding_boxes[:,0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces>1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering det_arr.append(det[index,:]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0]-args.margin/2, 0) bb[1] = np.maximum(det[1]-args.margin/2, 0) bb[2] = np.minimum(det[2]+args.margin/2, img_size[1]) bb[3] = np.minimum(det[3]+args.margin/2, img_size[0]) cropped = img[bb[1]:bb[3],bb[0]:bb[2],:] scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear') nrof_successfully_aligned += 1 filename_base, file_extension = os.path.splitext(output_filename) if args.detect_multiple_faces: output_filename_n = "{}_{}{}".format(filename_base, i, file_extension) else: output_filename_n = "{}{}".format(filename_base, file_extension) misc.imsave(output_filename_n, scaled) text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def main(): if ARGS.test == 'train': train(ARGS) elif ARGS.test == 'gen': gen(ARGS) elif ARGS.test == 'predict': predict(ARGS) elif ARGS.test == 'server': server_start(ARGS) elif ARGS.test == 'server_production': server_start(ARGS, ARGS.port) elif ARGS.test == 'hnm': hnm(ARGS) elif ARGS.test == 'val': val(ARGS) elif ARGS.test == 'fer': fer(ARGS) elif ARGS.test == 'profiling': profiling(ARGS) elif ARGS.test == 'facenet': print(facenet) mtcnn = tf.Session() pnet, rnet, onet = FaceDetector.create_mtcnn(mtcnn, None) # Load the model t_ = time.time() print('Loading model...') #fnet = facenet.load_model('../models/facenet/20180204-160909') # squeezenet fnet = facenet.load_model( '../models/facenet/20170512-110547.pb') # InceptionResnet V1 t_ = time.time() - t_ print('done', t_ * 1000) stats = { 'same': { 'd_avg': 0., 'd_max': -9999., 'd_min': 9999., 'sqr_avg': 0., 'count': 0, }, 'diff': { 'd_avg': 0., 'd_max': -9999., 'd_min': 9999., 'sqr_avg': 0., 'count': 0, }, 'precision': {}, 'timing': { 'count': 0, 'forward': 0. } } if True: # Get input and output tensors emb = None names = [] for iteration in range(16): # Load faces from LFW dataset and parse their names from path to group faces images = [] batch_size = 128 fid = 0 for i in range(batch_size): f = DirectoryWalker().get_a_file( directory='../data/face/lfw', filters=['.jpg']) if f is None or not f.path: break n = os.path.split(os.path.split(f.path)[0])[1] #print('name', n) n = abs(hash(n)) % (10**8) img = cv2.imread(f.path, 1) img = img extents, landmarks = FaceDetector.detect_face( img / 255., 120, pnet, rnet, onet, threshold=[0.6, 0.7, 0.9], factor=0.6, interpolation=cv2.INTER_LINEAR) for j, e in enumerate(extents): x1, y1, x2, y2, confidence = e.astype(dtype=np.int) #print(len(landmarks[j])) #cropped = img[int(x1):int(x2), int(y1):int(y2), :] aligned = FaceApplications.align_face(img, landmarks[j], intensity=1., sz=160, ortho=True, expand=1.5) #cv2.imwrite('../data/face/mtcnn_cropped/'+str(fid).zfill(4)+'.jpg', aligned) images.append(aligned / 255.) names.append(n) """debug = aligned.astype(dtype=np.int) print('debug', debug) for p in debug: cv2.circle(img, (p[0], p[1]), 2, (255, 0, 255)) for p in landmarks[j]: cv2.circle(img, (p[0], p[1]), 2, (255, 255, 0))""" fid += 1 #cv2.imwrite('../data/face/mtcnn_cropped/'+str(i).zfill(4)+'-annotated.jpg', img) # Run forward pass to calculate embeddings if len(images): t_ = time.time() if emb is None: emb = fnet(images) else: emb = np.concatenate((emb, fnet(images))) #emb = emb + sess.run(embeddings, feed_dict=feed_dict) t_ = time.time() - t_ stats['timing']['count'] += len(images) stats['timing']['forward'] += t_ * 1000 print('forward', emb.shape, t_ * 1000) print() print() print('avg. forward time:', stats['timing']['forward'] / stats['timing']['count']) # Test distance samples = sklearn.preprocessing.normalize(emb) for i1, s1 in enumerate(samples): for i2, s2 in enumerate(samples): if i1 != i2: d_ = scipy.spatial.distance.cosine(s1, s2) if names[i1] == names[ i2]: # Same person as annotated by LFW cate = 'same' else: # Different person cate = 'diff' c_ = stats[cate]['count'] stats[cate]['d_avg'] = stats[cate]['d_avg'] * c_ / ( c_ + 1) + d_ / (c_ + 1) d_sqr = d_ * d_ stats[cate]['sqr_avg'] = stats[cate][ 'sqr_avg'] * c_ / (c_ + 1) + d_sqr / (c_ + 1) if d_ > stats[cate]['d_max']: stats[cate]['d_max'] = d_ elif d_ < stats[cate]['d_min']: stats[cate]['d_min'] = d_ stats[cate]['count'] += 1 # Get statistics of precision on different thresholds increments = 64 for t_ in range(increments): threshold = 0.2 + t_ * (0.6 / increments) if threshold not in stats['precision']: stats['precision'][threshold] = { 'correct': 0, 'total': 0, 'precision': 0., 'true_pos': 0, 'total_pos': 0, 'recall': 0., } if (cate == 'same' and d_ <= threshold) or ( cate == 'diff' and d_ > threshold): stats['precision'][threshold]['correct'] += 1 if cate == 'same': if d_ <= threshold: stats['precision'][threshold][ 'true_pos'] += 1 stats['precision'][threshold]['total_pos'] += 1 stats['precision'][threshold][ 'recall'] = stats['precision'][threshold][ 'true_pos'] / stats['precision'][ threshold]['total_pos'] stats['precision'][threshold]['total'] += 1 stats['precision'][threshold]['precision'] = stats[ 'precision'][threshold]['correct'] / stats[ 'precision'][threshold]['total'] """tree = scipy.spatial.KDTree(samples) for i, s in enumerate(samples): print(i, tree.query(s))""" for cate in ['same', 'diff']: stats[cate]['stddev'] = stats[cate][ 'sqr_avg'] - stats[cate]['d_avg'] * stats[cate]['d_avg'] print() pp = pprint.PrettyPrinter(indent=4) pp.pprint(stats) # Print precision vs recall print() print('threshold,recall,precision') for t in stats['precision']: t_stat = stats['precision'][t] print( str(t) + ',' + str(t_stat['recall']) + ',' + str(t_stat['precision'])) elif ARGS.test == 'align': face_app = FaceApplications() face_app.align_dataset() elif ARGS.test == 'fxpress': fxpress(ARGS) elif ARGS.test == 'fxpress_train': fxpress_train(ARGS) elif ARGS.test == 'emoc': classifier = EmotionClassifier() classifier.build_network(ARGS) classifier.val(ARGS) elif ARGS.test == 'face_app': face_app = FaceApplications() face_app.detect() elif ARGS.test == 'face_benchmark': # Test different parameters, resolutions, interpolation methods for MTCNN face detection time vs precision interpolations = ['NEAREST', 'LINEAR', 'AREA'] resolutions = [256, 320, 384, 448, 512, 640, 1024, 1280] factors = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95] expectations = [ ['0001.jpg', 4], ['0002.jpg', 1], ['0003.jpg', 1], ['0004.jpg', 0], ['0005.jpg', 1], ['0006.jpg', 1], ['0007.jpg', 59], ['0008.jpg', 6], ['0009.jpg', 1], ['0010.jpg', 5], ['0011.jpg', 4], ['0012.jpg', 17], ['0013.jpg', 20], ['0014.jpg', 48], ['0015.jpg', 22], ] log_file = open('../data/face_benchmark.csv', 'w') log_file.write('time,precision_index,cp,options\n') for interp in interpolations: for res_cap in resolutions: for factor in factors: #res_cap = resolutions[0] #factor = factors[0] #interp = interpolations[0] options = { 'res_cap': res_cap, 'factor': factor, 'interp': interp, } if interp == 'NEAREST': interpolation = cv2.INTER_NEAREST elif interp == 'LINEAR': interpolation = cv2.INTER_LINEAR elif interp == 'AREA': interpolation = cv2.INTER_AREA iterations = 20 file_count = len(expectations) time_sampling = np.zeros(( file_count, iterations, ), dtype=np.float) pi_sampling = np.zeros(( file_count, iterations, ), dtype=np.float) image_dir = '../data/face_benchmark/' for k, item in enumerate(expectations): filename, expected_faces = item inpath = image_dir + filename img = cv2.imread(inpath, 1) if img is None: break img, scaling = ImageUtilities.fit_resize( img, maxsize=(res_cap, res_cap), interpolation=interpolation) retval, bindata = cv2.imencode('.jpg', img) bindata_b64 = base64.b64encode(bindata).decode() requests = { 'requests': [{ 'requestId': str(uuid.uuid1()), 'media': { 'content': bindata_b64 }, 'services': [{ 'type': 'face_', 'model': 'a-emoc', 'options': options }] }], 'timing': { 'client_sent': time.time() } } for i in range(iterations): #url = 'http://10.129.11.4/cgi/predict' requests['timing']['client_sent'] = time.time() url = 'http://192.168.41.41:8080/predict' postdata = json.dumps(requests) #print() #print(postdata) #print() request = Request(url, data=postdata.encode()) response = json.loads( urlopen(request).read().decode()) timing = response['timing'] server_time = timing['server_sent'] - timing[ 'server_rcv'] #print('server time:', server_time) total_time = (time.time() - timing['client_sent']) * 1000 client_time = total_time - server_time print('response time:', total_time) pi = 0. for r_ in response['requests']: for s_ in r_['services']: rects_ = s_['results']['rectangles'] if expected_faces: pi = len(rects_) / expected_faces elif len(rects_): pi = expected_faces / len(rects_) else: pi = 1.0 #print('faces detected:', len(rects_), pi) time_sampling[k][i] = total_time pi_sampling[k][i] = pi #time.sleep(0.5) #print() #print(response) #print() time_mean = np.mean(time_sampling) pi_mean = np.mean(pi_sampling) * 100 cp = pi_mean * pi_mean / time_mean print(time_mean, pi_mean) print() log_file.write(','.join([ str(time_mean), str(pi_mean), str(cp), json.dumps(options) ]) + '\n') log_file.flush() log_file.close()
def age_gender(): sample_url = url_for('uploaded_file', filename="sample.jpeg") if request.method == 'POST': if 'file' not in request.files: return render_template("face.html", error_msg="No file has been selected!") file = request.files['file'] if file and allowed_file(file.filename): with tf.Graph().as_default(): sess = tf.Session() with sess.as_default(): pnet, rnet, onet = mtcnn.create_mtcnn( sess, MTCNN_MODEL_PATH) filename = file.filename file_name = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(file_name) start = time.time() # Resize the orginal image to 680-width and save as jpeg cv_ori = cv2.imread(file_name) cv_resize = cv2.resize( cv_ori, dsize=(680, int(cv_ori.shape[0] / cv_ori.shape[1] * 680)), interpolation=cv2.INTER_CUBIC) cv2.imwrite(file_name, cv_resize) # Read the image for face detection img = misc.imread(file_name) cv_img = cv2.imread(file_name) # Detect faces and landmarks by MTCNN bounding_boxes, landmarks = mtcnn.detect_face( img, 20, pnet, rnet, onet, [0.6, 0.7, 0.7], 0.709) # Crop the aligned faces for age and gender classification aligned_images = gender_age_predict.load_image(cv_img, landmarks) # Estimate gender and age of each face using ResNet50 genders, ages = gender_age_predict.inception( FROZEN_GRAPH_PATH, aligned_images) # Draw boxes and labels for faces gender_age_predict.draw_label(cv_img, bounding_boxes, genders, ages) save_path = os.path.join(app.config['NEW_FOLDER'], filename) cv2.imwrite(save_path, cv_img) end = time.time() print('\n Evaluation time: {:.3f}s\n'.format(end - start)) file_url = url_for('uploaded_file', filename=filename) return render_template("face.html", user_image=file_url, error_msg='') else: print('\n Incorrect upload image format.\n') return render_template("face.html", user_image=sample_url, error_msg='Incorrect image format!') return render_template("face.html", user_image=sample_url, error_msg='')
face_size = 2 * 2 * conv4_face_out # fc layer fc_eye_size = 128 fc_face_size = 128 fc_face_mask_size = 256 face_face_mask_size = 128 fc_size = 128 fc2_size = 2 pre_data_mean = 0 pred_values = None sess = tf.Session() pnet, rnet, onet = detect_face.create_mtcnn(sess, "det/") # def disp_img(img, x = None, y = None, px = None, py = None): # # img = train_eye_left[0] # r, g, b = cv2.split(img) # img = cv2.merge([b,g,r]) # w, h, _= img.shape # cx, cy = int(w/2.0), int(h/2.0) # cv2.line(img, (cx, cy), (cx + x, cy + y), (0, 0, 255), 3) # cv2.line(img, (cx, cy), (cx + px, cy + py), (255, 0, 0), 3) # # cv2.imshow("image", img) # cv2.waitKey(0) # cv2.destroyAllWindows()
def convert_to_aligned_face(data_set, base_path, dataset_name): file_name = data_set.file_name genders = data_set.gender ages = data_set.age face_score = data_set.score num_images = data_set.shape[0] if dataset_name == "imdb": data_base_dir = os.path.join(base_path, "imdb_crop") elif dataset_name == "wiki": data_base_dir = os.path.join(base_path, "wiki_crop") else: raise NameError # load the mtcnn face detector with tf.Graph().as_default(): sess = tf.Session() with sess.as_default(): pnet, rnet, onet = mtcnn.create_mtcnn(sess, MTCNN_MODEL_PATH) error_count = 0 write_count = 0 for index in range(num_images): if face_score[index] < 0.75: continue if ~(0 <= ages[index] <= 100): continue if np.isnan(genders[index]): continue try: # Read the image for face detection img = misc.imread( os.path.join(data_base_dir, str(file_name[index][0]))) cv_img = cv2.imread( os.path.join(data_base_dir, str(file_name[index][0])), cv2.IMREAD_COLOR) # Detect faces for age and gender classification bounding_boxes, landmarks = mtcnn.detect_face( img, 20, pnet, rnet, onet, [0.6, 0.7, 0.7], 0.709) if bounding_boxes.shape[0] != 1: continue else: # Crop aligned faces from image aligned_faces = load_image(cv_img, landmarks) face = aligned_faces[0] # Resize and write image to path output_dir = os.getcwd() + '/faces/' + dataset_name + '/' if os.path.isdir(output_dir): pass else: os.mkdir(output_dir) image_name = 'image{}_{}_{}.jpg'.format( index + 70000, int(genders[index]), ages[index]) output_path = output_dir + image_name cv2.imwrite(output_path, face) except Exception: # some files seem not exist in face_data dir error_count = error_count + 1 print("read {} error".format(index + 1)) pass write_count = write_count + 1 print("There are ", error_count, " missing pictures") print("Found", write_count, "valid faces")