def main(): detector = MtcnnDetector(model_folder='./mtcnn/model', ctx=mx.cpu(), num_worker=4, accurate_landmark=True) for i in os.listdir(path1): if (((i in os.listdir(path2)) == True)): continue os.mkdir(path2 + str(i)) for j in os.listdir(path1 + str(i)): img = cv2.imread(path1 + str(i) + "/" + j, 1) results = detector.detect_face(img) if results is None: continue if results is not None: total_boxes = results[0] points = results[1] for id in range(len(points)): point = points[id].reshape((2, 5)).T nimg = preprocess(img, total_boxes[id], point, image_size='112,112') cv2.imwrite( path2 + str(i) + "/" + str(j) + "_" + str(id) + "jpg", nimg)
def get_mtccn_faces(args, ctx, image): # det_threshold = [0.6,0.7,0.8] det_threshold = [0.0, 0.1, 0.2] mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model') aligned_faces = [] detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark=True, threshold=det_threshold) print('Input shape: {}'.format(image.shape)) ret = detector.detect_face(image, det_type=args.det) if ret is None: return None bboxes, points = ret if bboxes.shape[0] == 0: return None for index, bbox in enumerate(bboxes): point = points[index] point = point.reshape((2, 5)).T nimg = face_preprocess.preprocess(image, bbox, point, image_size='112,112') nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB) # cv2.imshow('window', nimg) # cv2.waitKey(0) aligned = np.transpose(nimg, (2, 0, 1)) aligned_faces.append(aligned) return aligned_faces, bboxes
def detect_face(image_path): #time_start=time.time() tiny_face_path = './result/tiny_face/' if not os.path.exists(tiny_face_path): os.makedirs(tiny_face_path) else: clean(tiny_face_path) detector = MtcnnDetector(model_folder='model', ctx=mx.cpu(0), num_worker=4, accurate_landmark=False) print('detector', detector) img = cv2.imread(image_path) #print('img:',img) # run detector results = detector.detect_face(img) if results is not None: total_boxes = results[0] points = results[1] # extract aligned face chips chips = detector.extract_image_chips(img, points, 160, 0.37) for i, chip in enumerate(chips): cv2.imwrite(tiny_face_path + 'chip_' + str(i) + '.jpg', chip)
def main(): detector = MtcnnDetector(model_folder='model', ctx=mx.cpu(0), num_worker=1, accurate_landmark=True) img = cv2.imread('test2.jpg') # run detector results = detector.detect_face(img) if results is not None: total_boxes = results[0] points = results[1] # extract aligned face chips chips = detector.extract_image_chips(img, points, 128, 0.37) for i, chip in enumerate(chips): cv2.imshow('chip_' + str(i), chip) cv2.imwrite('chip_'+str(i)+'.png', chip) draw = img.copy() for b in total_boxes: cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255)) for p in points: for i in range(5): cv2.circle(draw, (p[i], p[i + 5]), 1, (0, 0, 255), 2) cv2.imshow("detection result", draw) cv2.waitKey(0)
class MyVideoCapture: def __init__(self, video_source): # Open the video source self.detector = MtcnnDetector(model_folder='model', ctx=mx.cpu(0), num_worker = 4 , accurate_landmark = False) self.vid = cv2.VideoCapture(video_source) if not self.vid.isOpened(): raise ValueError("Unable to open video source", video_source) def get_frame(self): if self.vid.isOpened(): ret, frame = self.vid.read() img = cv2.resize(frame, (650,500)) (h, w) = img.shape[:2] results = self.detector.detect_face(img) if results != None: total_boxes = results[0] points = results[1] global count count = 0 draw = img.copy() for b in total_boxes: cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255)) x1 = int(b[0]) y1 = int(b[1]) x2 = int(b[2]) y2 = int(b[3]) centroid = (int((x1+x2)/2),int((y1+y2)/2)) if centroid[0]>=60 and centroid[0]<=w-60: count += 1 cv2.circle(draw, centroid, 4, (0, 255, 0), -1) print("no_face:",count) cv2.putText(draw, "count = "+str(count), (0, 20),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) img = draw if ret: # Return a boolean success flag and the current frame converted to BGR return (ret, cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) else: return (ret, None) else: return (ret, None) # Release the video source when the object is destroyed def __del__(self): if self.vid.isOpened(): self.vid.release()
class Handler: def __init__(self, prefix, epoch, ctx_id=0): print('loading', prefix, epoch) if ctx_id >= 0: ctx = mx.gpu(ctx_id) else: ctx = mx.cpu() sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) all_layers = sym.get_internals() sym = all_layers['heatmap_output'] image_size = (128, 128) self.image_size = image_size model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) #model = mx.mod.Module(symbol=sym, context=ctx) model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1])) ]) model.set_params(arg_params, aux_params) self.model = model mtcnn_path = os.path.join(os.path.dirname(__file__), '..', 'deploy', 'mtcnn-model') self.det_threshold = [0.6, 0.7, 0.8] self.detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark=True, threshold=self.det_threshold) def get(self, img): ret = self.detector.detect_face(img, det_type=0) if ret is None: return None bbox, points = ret if bbox.shape[0] == 0: return None bbox = bbox[0, 0:4] points = points[0, :].reshape((2, 5)).T M = img_helper.estimate_trans_bbox(bbox, self.image_size[0], s=2.0) rimg = cv2.warpAffine(img, M, self.image_size, borderValue=0.0) img = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB) img = np.transpose(img, (2, 0, 1)) #3*112*112, RGB input_blob = np.zeros((1, 3, self.image_size[1], self.image_size[0]), dtype=np.uint8) input_blob[0] = img ta = datetime.datetime.now() data = mx.nd.array(input_blob) db = mx.io.DataBatch(data=(data, )) self.model.forward(db, is_train=False) alabel = self.model.get_outputs()[-1].asnumpy()[0] tb = datetime.datetime.now() print('module time cost', (tb - ta).total_seconds()) ret = np.zeros((alabel.shape[0], 2), dtype=np.float32) for i in range(alabel.shape[0]): a = cv2.resize(alabel[i], (self.image_size[1], self.image_size[0])) ind = np.unravel_index(np.argmax(a, axis=None), a.shape) #ret[i] = (ind[0], ind[1]) #h, w ret[i] = (ind[1], ind[0]) #w, h return ret, M
def find_pts(img, verbose=0, is_show=False, is_rect=False, is_normalize=False): # img = imread(pd_face.iloc[9]['path']) detector = MtcnnDetector(model_folder='model', ctx=mx.cpu(0), num_worker=4, accurate_landmark=False) results = detector.detect_face(img) if results is not None: total_boxes = results[0] pts = results[1] print(pts) draw = img.copy() if verbose > 0: print(f"image shape: {str(img.shape)}") # print("Number of faces detected: {}".format(len(dets))) for i in range(len(total_boxes)): print("Detection {}: Left: {} Top: {} Right: {} Bottom: {} ".format( i, total_boxes[i][0], total_boxes[i][1], total_boxes[i][2], total_boxes[i][3])) print(f"aspect_ratio:{(total_boxes[i][3] - total_boxes[i][1]) / (total_boxes[i][2] - total_boxes[i][0])}") if is_rect: img = cv.rectangle(draw, (int(total_boxes[i][0]), int(total_boxes[i][1])), (int(total_boxes[i][2]), int(total_boxes[i][3])), (255, 0, 0), 1) # shape = predictor(img, face) # use 15 points for i in range(len(pts)): if is_rect: for j in range(5): cv2.circle(draw, (pts[i][j], pts[i][j + 5]), 1, (0, 0, 255), 2) # for i in range(5):#[17, 21, 22, 26, 30, 36, 39, 41, 42, 46, 47, 49, 52, 55, 58]:#range(68): # if is_rect: # img = cv.circle(img.copy(), (shape.part(i).x, shape.part(i).y), 1, (0,0,255), 1) # # cv.putText(img,str(i), (shape.part(i).x,shape.part(i).y), cv.FONT_HERSHEY_COMPLEX, 0.25, (0,255,0), 1) # x = shape.part(i).x # y = shape.part(i).y # # print(f"{x}, {y}") # if is_normalize: # x = shape.part(i).x / img.shape[0] # y = shape.part(i).y / img.shape[1] # pts.append(x) # pts.append(y) break if is_show: cv2.imshow('image',draw) cv2.waitKey(0) return pts
def detect_face(img,img_size): detector = MtcnnDetector(model_folder='./model', ctx=mx.cpu(0), num_worker = 1 , accurate_landmark = False) results = detector.detect_face(img) if results is None: return 0 total_boxes = results[0] points = results[1] face_crops = detector.extract_image_chips(img, points, img_size, 0.37) ''' draw = img.copy() for b in total_boxes: cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255)) for p in points: for i in range(5): cv2.circle(draw, (p[i], p[i + 5]), 1, (255, 0, 0), 2) ''' return face_crops, total_boxes
class DetectorModel: def __init__(self, args): ctx = mx.cpu() if args.gpu == -1 else mx.gpu(args.gpu) mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model') self.max_face_number = args.max_face_number self.face_counter = 0 self.detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, minsize=args.mtcnn_minsize, factor=args.mtcnn_factor, accurate_landmark=True, threshold=args.mtcnn_threshold) def get_all_boxes(self, face_img, save_img=False): face_num = self.max_face_number ret = self.detector.detect_face(face_img, det_type=0) if ret is None: return [] bbox, points = ret sorted_index = bbox[:, 0].argsort() bbox = bbox[sorted_index] points = points[sorted_index] aligned = [] for index in range(0, len(bbox[:face_num])): item_bbox = bbox[index, 0:4] item_points = points[index, :].reshape((2, 5)).T nimg = face_preprocess.preprocess(face_img, item_bbox, item_points, image_size='112,112') if save_img: cv2.imwrite( './Temp/{}-{}.jpg'.format(time.time(), self.face_counter), nimg) self.face_counter += 1 aligned.append(nimg) return zip(aligned, bbox)
class MTCNN(object): def __init__(self, min_face_size: float = 20., factor: float = 0.709, threshold: List[float] = [0.6, 0.7, 0.8]): self.device = mx.cpu() if len( mx.test_utils.list_gpus()) == 0 else mx.gpu(0) self.min_face_size = min_face_size self.factor = factor self.threshold = threshold self.model_path = os.path.realpath( os.path.join(current_dir, 'mtcnn_model')) self.model = MtcnnDetector(model_folder=self.model_path, ctx=self.device, num_worker=1, accurate_landmark=True, threshold=self.threshold, minsize=self.min_face_size, factor=self.factor) def align(self, image: np.ndarray) -> Tuple[List[Any], List[Any], List[Any]]: ret = self.model.detect_face(image, det_type=0) if ret is None: return [], [], [] bounding_boxes, landmarks = ret if bounding_boxes.shape[0] == 0: return [], [], [] reference_facial_points = np.array( [[30.29459953, 51.69630051], [65.53179932, 51.50139999], [48.02519989, 71.73660278], [33.54930115, 92.3655014], [62.72990036, 92.20410156]], dtype=np.float32) reference_facial_points[:, 0] += 8. transform = SimilarityTransform() faces = [] for landmark in landmarks: tmp_landmark = np.array(landmark, dtype=np.float32).reshape( (2, 5)).T transform.estimate(tmp_landmark, reference_facial_points) M = transform.params[0:2, :] warped_face = cv2.warpAffine(image, M, (112, 112), borderValue=0.0) faces.append(warped_face) return bounding_boxes, landmarks, faces
def face_to_file(dest, task): # at least have three gpu, uses second and third gpu detector = MtcnnDetector( model_folder='model', ctx=mx.cpu(0), #ctx=mx.gpu(int(task[0] / 2) + 1), num_worker=4, accurate_landmark=False) for file in task[1]: path_component = os.path.normpath(file).split(os.path.sep) for i, c in enumerate(path_component): if c == '..': path_component[i] = '' try: path_component.remove('') except: pass path_component[0] = dest basename = os.path.basename(file) jpg_dest = os.path.join(*path_component) makedir(jpg_dest) # command = 'ffmpeg -loglevel quiet -i {0} -vf fps=1 {1}/{2}_%03d.jpg'.format( # file, jpg_dest, basename) img = cv2.imread(file) results = detector.detect_face(img) if results is not None: total_boxes = results[0] points = results[1] # extract aligned face chips chips = detector.extract_image_chips(img, points, 256, 0.37) for i, chip in enumerate(chips): cv2.imwrite('{0}/{1}_{2}.jpg'.format(jpg_dest, basename, i), chip) else: print('no face in ', file)
# coding: utf-8 import mxnet as mx from mtcnn_detector import MtcnnDetector import cv2 import os import time detector = MtcnnDetector(model_folder='model', ctx=mx.cpu(0), num_worker = 4 , accurate_landmark = False) img = cv2.imread('test2.jpg') # run detector results = detector.detect_face(img) if results is not None: total_boxes = results[0] points = results[1] # extract aligned face chips chips = detector.extract_image_chips(img, points, 144, 0.37) for i, chip in enumerate(chips): cv2.imshow('chip_'+str(i), chip) cv2.imwrite('chip_'+str(i)+'.png', chip) draw = img.copy() for b in total_boxes: cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255)) for p in points:
def main(): args = get_args() src_file = args.src if not os.path.isfile(src_file): raise ValueError("{} not exist".format(src_file)) img = cv2.imread(src_file) input = io.imread(src_file) # Run the 3D face alignment on a test image, without CUDA. fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, enable_cuda=True, flip_input=False, model_path=args.model) detector = MtcnnDetector(model_folder=args.detmodel, ctx=mx.gpu(0), num_worker=1, accurate_landmark=True) # run detector results = detector.detect_face(img) box = results[0][0] preds = fa.get_landmarks(input, box[1], box[3], box[0], box[2])[-1] # TODO: Make this nice fig = plt.figure(figsize=plt.figaspect(.5)) ax = fig.add_subplot(1, 2, 1) ax.imshow(input) ax.plot(preds[0:17, 0], preds[0:17, 1], marker='o', markersize=6, linestyle='-', color='w', lw=2) ax.plot(preds[17:22, 0], preds[17:22, 1], marker='o', markersize=6, linestyle='-', color='w', lw=2) ax.plot(preds[22:27, 0], preds[22:27, 1], marker='o', markersize=6, linestyle='-', color='w', lw=2) ax.plot(preds[27:31, 0], preds[27:31, 1], marker='o', markersize=6, linestyle='-', color='w', lw=2) ax.plot(preds[31:36, 0], preds[31:36, 1], marker='o', markersize=6, linestyle='-', color='w', lw=2) ax.plot(preds[36:42, 0], preds[36:42, 1], marker='o', markersize=6, linestyle='-', color='w', lw=2) ax.plot(preds[42:48, 0], preds[42:48, 1], marker='o', markersize=6, linestyle='-', color='w', lw=2) ax.plot(preds[48:60, 0], preds[48:60, 1], marker='o', markersize=6, linestyle='-', color='w', lw=2) ax.plot(preds[60:68, 0], preds[60:68, 1], marker='o', markersize=6, linestyle='-', color='w', lw=2) ax.axis('off') ax = fig.add_subplot(1, 2, 2, projection='3d') surf = ax.scatter(preds[:, 0] * 1.00, preds[:, 1], preds[:, 2], c="cyan", alpha=1.0, edgecolor='b') ax.plot3D(preds[:17, 0] * 1.00, preds[:17, 1], preds[:17, 2], color='blue') ax.plot3D(preds[17:22, 0] * 1.00, preds[17:22, 1], preds[17:22, 2], color='blue') ax.plot3D(preds[22:27, 0] * 1.00, preds[22:27, 1], preds[22:27, 2], color='blue') ax.plot3D(preds[27:31, 0] * 1.00, preds[27:31, 1], preds[27:31, 2], color='blue') ax.plot3D(preds[31:36, 0] * 1.00, preds[31:36, 1], preds[31:36, 2], color='blue') ax.plot3D(preds[36:42, 0] * 1.00, preds[36:42, 1], preds[36:42, 2], color='blue') ax.plot3D(preds[42:48, 0] * 1.00, preds[42:48, 1], preds[42:48, 2], color='blue') ax.plot3D(preds[48:, 0] * 1.00, preds[48:, 1], preds[48:, 2], color='blue') ax.view_init(elev=135., azim=90.) ax.set_xlim(ax.get_xlim()[::-1]) plt.show()
class VideoCamera(object): def __init__(self): # Using OpenCV to capture from device 0. If you have trouble capturing # from a webcam, comment the line below out and use a video file # instead. try: self.video = cv2.VideoCapture(0) self.detector = MtcnnDetector(model_folder='model', ctx=mx.cpu(), num_worker=4, accurate_landmark=False, threshold=[0.6, 0.7, 0.7]) sym, arg_params, aux_params = mx.model.load_checkpoint( 'model/model-r34-amf/model', 0) #arg_params, aux_params = ch_dev(arg_params, aux_params, ctx) self.model = mx.mod.Module(symbol=sym, context=mx.cpu(), label_names=None) #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))]) self.model.bind(data_shapes=[('data', (1, 3, 112, 112))]) self.model.set_params(arg_params, aux_params) self.neigh = KNeighborsClassifier(n_neighbors=1) self.X = np.load('base.npy') self.names = [] with open('base.txt', 'r') as f: for line in f: line = line.replace("\n", "") self.names.append(line) # print(self.names) y = np.arange(self.X.shape[0]) # print(y) self.neigh.fit(self.X, y) os.system('cls') print( '\n\n\n\n\n\t\t\t\tWHAT ARE YOUR COMMANDS?\n\n\n\n\n\t\t\tif you want to exit watching, please press \'q\'!' ) except: exit() # If you decide to use video.mp4, you must have this file in the folder # as the main.py. # self.video = cv2.VideoCapture('video.mp4') def __del__(self): self.video.release() # def fun(self, model, img): # input_blob = np.expand_dims(img, axis=0) # data = mx.nd.array(input_blob) # db = mx.io.DataBatch(data=(data,)) # self.model.forward(db, is_train=False) # embedding = self.model.get_outputs()[0].asnumpy() # embedding = sklearn.preprocessing.normalize(embedding) # return embedding def get_frame(self): success, image = self.video.read() # We are using Motion JPEG, but OpenCV defaults to capture raw images, # so we must encode it into JPEG in order to correctly display the # video stream. results = self.detector.detect_face(image) if results is not None: total_boxes = results[0] points = results[1] draw = image b = total_boxes[0, 0:4] p = points[0, :].reshape((2, 5)).T nimg = face_preprocess.preprocess(image, b, p, image_size='112,112') nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB) # ??? img = np.transpose(nimg, (2, 0, 1)) input_blob = np.expand_dims(img, axis=0) data = mx.nd.array(input_blob) db = mx.io.DataBatch(data=(data, )) self.model.forward(db, is_train=False) embedding = self.model.get_outputs()[0].asnumpy() em = sklearn.preprocessing.normalize(embedding) k = self.neigh.predict(em)[0] # mind = 2 name = 'unknown' rgb = (255, 0, 0) if np.linalg.norm(self.X[k] - em[0]) < 1.24: name = self.names[k] rgb = (0, 255, 255) else: name = 'unknown' # for k in range(4): # di = np.linalg.norm(self.X[k] - em[0]) # if di < 1.24: # name = self.names[k] # rgb = (0, 255, 255) # # else: # # name = 'unknown' cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), rgb) cv2.putText(draw, name, (int(b[0]), int(b[1])), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2) image = draw cv2.imshow('capture', image)
def main(): #video load VideoPath = "../videos/3.mp4" imagelist = Video2list(VideoPath) #face detect mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model') detector = MtcnnDetector(model_folder=mtcnn_path, ctx=mx.cpu(0), num_worker=1, minsize=80, accurate_landmark=True, threshold=[0.6, 0.7, 0.9]) Videoimginfo = [] for img in imagelist: ret = detector.detect_face(img) Videoimginfo.append(ret) #face feature get model = MobileFaceNet(512) model_static_cnn = torch.load("model_mobilefacenet.pth", map_location=lambda storage, loc: storage) net_model_static_cnn = {} for k, v in model_static_cnn.items(): if k == "fc2.weight": continue if k == "fc2.bias": continue net_model_static_cnn[k] = v model.load_state_dict(net_model_static_cnn) model.eval() imageinfo = [] allFaceFeture = [] for item in range(len(imagelist)): if Videoimginfo[item] is not None: image = imagelist[item] ret = Videoimginfo[item] facefeature = Facefeature(ret, image, model) imageinfo.append(len(facefeature[0])) allFaceFeture += facefeature[0] Videoimginfo[item] = [facefeature[1], facefeature[2]] else: imageinfo.append(0) Facecalsslist, classnum = dbscan(np.array(allFaceFeture), distance, minPt) print(Facecalsslist, classnum) #pic2video fourcc = cv2.VideoWriter_fourcc(*"MJPG") videoWrite = cv2.VideoWriter( 'output.avi', fourcc, 25, (imagelist[0].shape[1], imagelist[0].shape[0])) font = cv2.FONT_HERSHEY_SIMPLEX cc = 0 flag = 0 for item in range(len(imageinfo)): img = imagelist[item] if imageinfo[item] == 0: videoWrite.write(img) cv2.imwrite("./ll/%d.jpg" % cc, img) else: #in this one pic may be has more than one pic # rectangle point lable ; bbox, point = Videoimginfo[item] for q in range(len(point)): for i in range(5): cv2.circle(img, (int(point[q][i]), (int(point[q][i + 5]))), 3, (0, 255, 0), -1) cv2.rectangle(img, (int(bbox[q][0]), int(bbox[q][1])), (int(bbox[q][2]), int(bbox[q][3])), (0, 255, 255), 2) cv2.putText(img, "%d" % Facecalsslist[flag], (int(bbox[q][0]), int(bbox[q][1])), font, 1.2, (255, 255, 255), 2) flag += 1 cv2.imwrite("./ll/%d.jpg" % cc, img) videoWrite.write(img) cc += 1 videoWrite.release()
return logits # ---------------------------网络结束--------------------------- logits = CNNlayer() predict = tf.argmax(logits, 1) saver = tf.train.Saver() sess = tf.Session() saver.restore(sess, 'recog_model/faces.ckpt-5') user = input("图片(I)还是摄像头(V):") if user == "I": path = input("图片路径是:") full_img = cv2.imread(path) results = detector.detect_face(full_img) if results is not None: total_boxes = results[0] points = results[1] # 画人脸框 draw = full_img.copy() for b in total_boxes: cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255)) for p in points: for i in range(5): cv2.circle(draw, (p[i], p[i + 5]), 1, (0, 0, 255), 2) cv2.imshow("detection result", draw) # extract aligned face chips chips = detector.extract_image_chips(full_img, points, 144, 0.37) for chip in chips:
def main(img_list_fn, save_dir, save_img=True, show_img=False): minsize = 20 caffe_model_path = "./model" threshold = [0.6, 0.7, 0.7] scale_factor = 0.709 if not osp.exists(save_dir): os.makedirs(save_dir) fp_rlt = open(osp.join(save_dir, 'mtcnn_fd_rlt.json'), 'w') result_list = [] t1 = time.clock() detector = MtcnnDetector(caffe_model_path) t2 = time.clock() print("initFaceDetector() costs %f seconds" % (t2 - t1)) fp = open(img_list_fn, 'r') ttl_time = 0.0 img_cnt = 0 for line in fp: imgpath = line.strip() print("\n===>" + imgpath) if imgpath == '': print 'empty line, not a file name, skip to next' continue if imgpath[0] == '#': print 'skip line starts with #, skip to next' continue rlt = {} rlt["filename"] = imgpath rlt["faces"] = [] rlt['face_count'] = 0 try: img = cv2.imread(imgpath) except: print('failed to load image: ' + imgpath) rlt["message"] = "failed to load" result_list.append(rlt) continue if img is None: print('failed to load image: ' + imgpath) rlt["message"] = "failed to load" result_list.append(rlt) continue img_cnt += 1 t1 = time.clock() bboxes, points = detector.detect_face(img, minsize, threshold, scale_factor) t2 = time.clock() ttl_time += t2 - t1 print("detect_face() costs %f seconds" % (t2 - t1)) if bboxes is not None and len(bboxes) > 0: for (box, pts) in zip(bboxes, points): # box = box.tolist() # pts = pts.tolist() tmp = {'rect': box[0:4], 'score': box[4], 'pts': pts} rlt['faces'].append(tmp) rlt['face_count'] = len(bboxes) rlt['message'] = 'success' result_list.append(rlt) # print('output bboxes: ' + str(bboxes)) # print('output points: ' + str(points)) # toc() print( "\n===> Processed %d images, costs %f seconds, avg time: %f seconds" % (img_cnt, ttl_time, ttl_time / img_cnt)) if bboxes is None: continue if save_img or show_img: draw_faces(img, bboxes, points) if save_img: save_name = osp.join(save_dir, osp.basename(imgpath)) cv2.imwrite(save_name, img) if show_img: cv2.imshow('img', img) ch = cv2.waitKey(0) & 0xFF if ch == 27: break json.dump(result_list, fp_rlt, indent=4) fp_rlt.close() fp.close() if show_img: cv2.destroyAllWindows()
class Filter: def __init__(self, img_path, log_path, meta_path, purges=True, only_one=False): self.detector = MtcnnDetector(model_folder='model', ctx=mx.gpu(0), num_worker=WORKER_COUNT, accurate_landmark=False) self.img_path = img_path self.success_logs_path = os.path.join(log_path, 'successes/') self.folders = [x for x in glob(img_path + "*")] self.log_path = log_path self.file_log = os.path.join(log_path, 'lists/') data_log = os.path.join(log_path, 'bboxes/') if purges: _purge(data_log, 'path') self.b_log = os.path.join(data_log, 'b/') # bbox self.p_log = os.path.join(data_log, 'p/') # facial points self.save_path = os.path.join(log_path, 'imgs/') # meta self.meta_path = meta_path self.meta_records = os.path.join(meta_path, 'records/') self.meta_rois = os.path.join(meta_path, 'rois/') self.only_one_mode = only_one if os.path.isdir(self.meta_records) and os.path.isdir( self.meta_rois): # validate file structure pass else: raise Exception('meta record broken.') # reset logs if purges: _purge(self.save_path, 'path') _purge(self.file_log, 'path') _purge(self.b_log, 'path') _purge(self.p_log, 'path') def _intersect_ratio(self, box, rois): """Calculate max intersect ratio between a box and list of candidate, ratio relative to the box. E.g: area(overlap) / area(box) Note: https://stackoverflow.com/questions/27152904 :param box: tuple, format: x, y, w, h. :param rois: list, a list of rois. Same format as box. :return: float, a number between 0 and 1. """ res = 0 assert (box[2] >= box[0]) assert (box[3] >= box[1]) box_area = (box[2] - box[0]) * (box[3] - box[1]) if box_area == 0: return 0 for roi in rois: assert (roi[2] >= roi[0]) assert (roi[3] >= roi[1]) dx = min(roi[2], box[2]) - max(roi[0], box[0]) dy = min(roi[3], box[3]) - max(roi[1], box[1]) if (dx >= 0) and (dy >= 0): res = ((dx * dy) / box_area) assert (res >= 0) assert (res <= 1) return res def _detect_img(self, img_path, img_id, save_paths, file_log, b_log, p_log, rejection_log, rois, min_confidence=0.9, min_resolution=24): print('Detecting:', img_path) img = cv2.imread(img_path) # run detector results = self.detector.detect_face(img) # filter out low resolution, increase next step CNN accuracy and decrease this step false positive total_boxes = [] points = [] if results is not None: for i, b in enumerate(results[0]): # check resolution, confidence, and intersect ratio to rcnn meta. if (b[2]-b[0] >= min_resolution) and (b[3]-b[1] >= min_resolution) and \ (b[4] >= min_confidence) and \ (self._intersect_ratio(b, rois) >= 0.7): total_boxes.append(b) points.append(results[1][i]) else: # rejected by filter # write format: img_id, rejection code(Fail on filter: F), x1, y1, x2, y2, confidence. _log_one_line( rejection_log, '{} {} {} {} {} {} {}'.format(img_id, 'F', b[0], b[1], b[2], b[3], b[4])) if (results is None) or (len(total_boxes) == 0): _log_one_line(file_log, '{} 0'.format(img_id)) return # extract aligned face chips chips = self.detector.extract_image_chips(img, points, 144, 0.37) face_num = len(chips) _log_one_line(file_log, '{} {}'.format(img_id, face_num)) if self.only_one_mode: # more than one face if face_num != 1: # Log as failures and record no image for this file. # Note: due to we have much data and want to reduce noise as much as possible # record less noise is more important than record more data. for i, b in enumerate(results[0]): # log as failure code M _log_one_line( rejection_log, '{} {} {} {} {} {} {}'.format(img_id, 'M', b[0], b[1], b[2], b[3], b[4])) return # end operation save_path = save_paths[0] cv2.imwrite(os.path.join(save_path, '{}.jpg'.format(img_id)), chips[0]) else: assert (len(save_paths) == 3) if face_num == 1: save_path = save_paths[0] elif face_num == 2: save_path = save_paths[1] else: save_path = save_paths[2] for ind, chip in enumerate(chips): cv2.imwrite( os.path.join(save_path, '{}_{}.jpg'.format(img_id, ind)), chip) # write boxes for ind, b in enumerate(total_boxes): # write format: img_id, x1, y1, x2, y2, confidence. _log_one_line( b_log, '{} {} {} {} {} {} {}'.format(img_id, ind, b[0], b[1], b[2], b[3], b[4])) for ind, p in enumerate(points): for i in range(5): # write format: img_id _log_one_line( p_log, '{} {} {} {} {}'.format(img_id, ind, i, p[i], p[i + 5])) def write_all(self): # some extra logging info _total = len(self.folders) _count = 0 for folder in self.folders: category_id = folder.split('/')[-1] big_ass_warning('CATEGORY: {}, PROGRESS: {:.4f}%'.format( category_id, float(_count) * 100 / _total)) # load meta _meta_path = os.path.join(self.meta_rois, '{}.txt'.format(category_id)) if not os.path.isfile( _meta_path ): # meta may not be there due to no person in category. continue with open(_meta_path) as _f: _lines = _f.readlines() records = {} # meta key: image_id, value: roi for line in _lines: _raw = line.strip().split() assert (len(_raw) == 6) _img_id = _raw[0] roi = [ int(_raw[2]), int(_raw[3]), int(_raw[4]) + int(_raw[2]), int(_raw[5]) + int(_raw[3]), ] # x1, y1, x2, y2 if _img_id in records: records[_img_id].append(roi) else: records[_img_id] = [roi] # build log system file_log = os.path.join(self.file_log, '{}.txt'.format(category_id)) _purge(file_log, 'file') c_b_log = os.path.join(self.b_log, '{}.txt'.format(category_id)) _purge(c_b_log, 'file') c_p_log = os.path.join(self.p_log, '{}.txt'.format(category_id)) _purge(c_p_log, 'file') c_rj_log = os.path.join(self.b_log, 'r{}.txt'.format(category_id)) _purge(c_rj_log, 'file') save_path = os.path.join(self.save_path, '{}/'.format(category_id)) _purge(save_path, 'path') save_paths = [] if self.only_one_mode: save_paths = [save_path] else: for i in range(3): # three tiers for ppl number. p = os.path.join(save_path, '{}/'.format((i + 1), )) _purge(p, 'path') save_paths.append(p) # write data for file in os.listdir(folder): if file.endswith(".jpg"): _path = os.path.join(folder, file) img_id = str(int( file.split('/')[-1].split('.')[0])).zfill(6) if not (img_id in records ): # zero person in image, based on meta data continue rois = records[img_id] # If only one mode enabled and 1 rois, or if mode is not enabled. # Note: added filter condition, only process if there is one person in frame. if (not self.only_one_mode) or (len(rois) == 1): self._detect_img(img_path=_path, img_id=img_id, save_paths=save_paths, file_log=file_log, b_log=c_b_log, p_log=c_p_log, rejection_log=c_rj_log, rois=rois) _count += 1
def main(lfw_list_fn, lfw_root, save_dir, save_img=False, show_img=False): minsize = 20 caffe_model_path = "../../model" threshold = [0.6, 0.7, 0.7] scale_factor = 0.709 if not osp.exists(save_dir): os.makedirs(save_dir) fp_rlt = open(osp.join(save_dir, 'lfw_mtcnn_fd_rlt.json'), 'w') # result_list = [] fp_rlt.write('[\n') t1 = time.clock() detector = MtcnnDetector(caffe_model_path) t2 = time.clock() print("initFaceDetector() costs %f seconds" % (t2 - t1)) fp = open(lfw_list_fn, 'r') ttl_time = 0.0 img_cnt = 0 for line in fp: imgpath = line.strip() print("\n===>" + imgpath) if imgpath == '': print 'empty line, not a file name, skip to next' continue if imgpath[0] == '#': print 'skip line starts with #, skip to next' continue splits = imgpath.split() imgpath = splits[0] id = 'unkown' if len(splits) < 2 else splits[1] if not imgpath.startswith('/'): fullpath = osp.join(lfw_root, imgpath) else: fullpath = imgpath rlt = {} rlt["filename"] = imgpath rlt["faces"] = [] rlt['face_count'] = 0 rlt['id'] = id try: img = cv2.imread(fullpath) except: print('failed to load image: ' + fullpath) rlt["message"] = "failed to load" result_list.append(rlt) continue if img is None: print('failed to load image: ' + fullpath) rlt["message"] = "failed to load" result_list.append(rlt) continue img_cnt += 1 t1 = time.clock() bboxes, points = detector.detect_face(img, minsize, threshold, scale_factor) t2 = time.clock() ttl_time += t2 - t1 print("detect_face() costs %f seconds" % (t2 - t1)) if len(bboxes) > 0: for (box, pts) in zip(bboxes, points): # box = box.tolist() # pts = pts.tolist() tmp = {'rect': box[0:4], 'score': box[4], 'pts': pts } rlt['faces'].append(tmp) rlt['face_count'] = len(bboxes) rlt['message'] = 'success' # result_list.append(rlt) s = json.dumps(rlt, indent=2) fp_rlt.write(s + ',\n') # fp_rlt.write(',\n' + s) # print('output bboxes: ' + str(bboxes)) # print('output points: ' + str(points)) # toc() if bboxes is None: continue print("\n===> Processed %d images, costs %f seconds, avg time: %f seconds" % ( img_cnt, ttl_time, ttl_time / img_cnt)) if save_img or show_img: draw_faces(img, bboxes, points) if save_img: save_name = osp.join(save_dir, osp.basename(imgpath)) cv2.imwrite(save_name, img) if show_img: cv2.imshow('img', img) ch = cv2.waitKey(0) & 0xFF if ch == 27: break # json.dump(result_list, fp_rlt, indent=2) # print fp_rlt.tell() # delete the last ',' if sys.platform is 'win32': fp_rlt.seek(-3, 1) else: fp_rlt.seek(-2, 1) fp_rlt.write('\n]') fp_rlt.close() fp.close() if show_img: cv2.destroyAllWindows()
def alignMain(args): mkdirP(args.outputDir) imgs = list(iterImgs(args.inputDir)) # Shuffle so multiple versions can be run at once. random.shuffle(imgs) align = MtcnnDetector(model_folder=mtcnn_dir + 'model', ctx=mx.gpu(int(args.gpus.split(',')[0])), num_worker=4, minsize=50, accurate_landmark=True) nFallbacks = 0 for imgObject in imgs: print("=== {} ===".format(imgObject.path)) outDir = os.path.join(args.outputDir, imgObject.cls) mkdirP(outDir) outputPrefix = os.path.join(outDir, imgObject.name) imgName = outputPrefix + "." + args.ext if os.path.isfile(imgName): if args.verbose: print(" + Already found, skipping.") else: rgb = imgObject.getBGR() if rgb is None: if args.verbose: print(" + Unable to load.") outRgb = None else: detect = align.detect_face(rgb) if detect is not None: bb = detect[0] pts = detect[1] if bb.shape[0] > 1: bb_size = (bb[:, 2] - bb[:, 0]) * (bb[:, 3] - bb[:, 1]) i_max = np.argmax(bb_size) bb = bb[i_max:i_max + 1] pts = pts[i_max:i_max + 1] outBgr = align.extract_image_chips(rgb, pts, args.size, args.pad) outBgr = outBgr[0] else: if args.verbose: print(" + Unable to align.") if args.fallbackLfw and outRgb is None: nFallbacks += 1 deepFunneled = "{}/{}.jpg".format( os.path.join(args.fallbackLfw, imgObject.cls), imgObject.name) shutil.copy( deepFunneled, "{}/{}.jpg".format( os.path.join(args.outputDir, imgObject.cls), imgObject.name)) if outBgr is not None: #if args.verbose: #print(" + Writing aligned file to disk.") #outBgr = cv2.cvtColor(outRgb, cv2.COLOR_RGB2BGR) #pdb.set_trace() cv2.imwrite(imgName, outBgr) if args.fallbackLfw: print('nFallbacks:', nFallbacks)
# coding: utf-8 import mxnet as mx from mtcnn_detector import MtcnnDetector import cv2 import os import time detector = MtcnnDetector(model_folder='model', ctx=mx.cpu(0)) img = cv2.imread('test2.jpg') t1 = time.time() results = detector.detect_face(img, False) print 'time: ', time.time() - t1 if results is not None: total_boxes = results[0] points = results[1] draw = img.copy() for b in total_boxes: cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255)) for p in points: for i in range(5): cv2.circle(draw, (p[i], p[i + 5]), 1, (0, 0, 255), 2) cv2.imshow("detection result", draw) cv2.waitKey(0)
def main(nsplits, split_id, list_file, img_root_dir, mtcnn_model_dir, save_dir=None): if not save_dir: save_dir = './aligned_root_dir' if not osp.exists(save_dir): print('mkdir for aligned root dir: ', save_dir) os.makedirs(save_dir) save_aligned_dir = osp.join(save_dir, 'aligned_imgs') if not osp.exists(save_aligned_dir): print('mkdir for aligned/cropped face imgs: ', save_dir) os.makedirs(save_aligned_dir) save_rects_dir = osp.join(save_dir, 'face_rects') if not osp.exists(save_rects_dir): print('mkdir for face rects/landmarks: ', save_rects_dir) os.makedirs(save_rects_dir) detector = MtcnnDetector(mtcnn_model_dir) fp = open(list_file, 'r') all_lines = fp.readlines() fp.close() total_line_cnt = len(all_lines) print('--->%d imgs in total' % total_line_cnt) if nsplits < 2: if split_id > 0: print('===> Will only process first %d imgs' % split_id) start_line = 0 end_line = split_id else: print('===> Will process all of the images') start_line = 0 end_line = total_line_cnt else: assert (split_id < nsplits) lines_per_split = float(total_line_cnt) / nsplits start_line = int(lines_per_split * split_id) end_line = int(lines_per_split * (split_id + 1)) if end_line + 1 >= total_line_cnt: end_line = total_line_cnt print('===> Will only process imgs in the range [%d, %d)]' % (start_line, end_line)) count = start_line for line in all_lines[start_line:end_line]: line = line.strip() print count count = count + 1 img_fn = osp.join(img_root_dir, line) print('===> Processing img: ' + img_fn) img = cv2.imread(img_fn) ht = img.shape[0] wd = img.shape[1] print 'image.shape:', img.shape spl = osp.split(line) sub_dir = osp.split(spl[0])[1] print 'sub_dir: ', sub_dir if CHINESE_2_PINYIN: sub_dir = pinyin.get(sub_dir, format="strip") sub_dir = sub_dir.replace(u'\xb7', '-').encode( 'utf-8') # replace the dot sign in names base_name = osp.splitext(spl[1])[0] save_img_subdir = osp.join(save_aligned_dir, sub_dir) if not osp.exists(save_img_subdir): os.mkdir(save_img_subdir) save_rect_subdir = osp.join(save_rects_dir, sub_dir) if not osp.exists(save_rect_subdir): os.mkdir(save_rect_subdir) # print pts save_rect_fn = osp.join(save_rect_subdir, base_name + '.txt') fp_rect = open(save_rect_fn, 'w') boxes, points = detector.detect_face(img) nfaces = len(boxes) fp_rect.write('%d\n' % nfaces) for i in range(nfaces): box = boxes[i] pts = points[i] if i: save_img_fn = osp.join(save_img_subdir, base_name + '_%d.jpg' % (i + 1)) else: save_img_fn = osp.join(save_img_subdir, base_name + '.jpg') facial5points = np.reshape(pts, (2, -1)) dst_img = warp_and_crop_face(img, facial5points, reference_5pts, output_size) cv2.imwrite(save_img_fn, dst_img) print 'aligend face saved into: ', save_img_fn for it in box: fp_rect.write('%5.2f\t' % it) fp_rect.write('\n') for i in range(5): fp_rect.write('%5.2f\t%5.2f\n' % (facial5points[0][i], facial5points[1][i])) fp_rect.close()
#if you use hog feature, there will be a short pause after you draw a first boundingbox, that is due to the use of Numba. detector = MtcnnDetector(model_folder='model', ctx=mx.cpu(0), num_worker=4, accurate_landmark=False) my_filter = KalmanFilter(dim_x=2, dim_z=1) while (thermal.isOpened()): boundingbox = [] avearray2, Normalizedlist, Normalizedlist2 = [], [], [] avearray = [] retr, rgbframe = rbg.read() ret, thframe = thermal.read() if initTracking: results = detector.detect_face(rgbframe) if results is not None: total_boxes = results[0] points = results[1] for p in points: lf = rg = p[2] top = bm = p[7] for i in range(3, 5): if p[i] < lf: lf = p[i] if p[i] > rg: rg = p[i] if p[i + 5] > top:
def main(): args = get_args() src_dir = args.src if not os.path.exists(src_dir): raise ValueError("src dir not exist {}".format(src_dir)) split_ratio = args.split dst_dir = os.path.abspath(args.dst) err_dir = os.path.abspath(args.err) num_gpus = args.ngpus if num_gpus == -1: num_gpus = len(mx.test_utils.list_gpus()) if num_gpus == 0: ctx = mx.cpu(0) else: ctx = [mx.gpu(i) for i in range(num_gpus)] print("src dir={} dst dir={} err_dir={} gpu={}".format(src_dir, dst_dir, err_dir, num_gpus)) detector = MtcnnDetector(model_folder='model', ctx=ctx, num_worker=args.workers, accurate_landmark=False) file_count = 0 for root, dirs, files in os.walk(src_dir): relpath = os.path.relpath(root, src_dir) # dd = os.path.join(dst_dir, relpath) ed = os.path.join(err_dir, relpath) class_data_written = False # training for filename in files: if filename.lower().endswith(('.jpg', '.jpeg', '.gif', '.png')): absfile = os.path.join(root, filename) success = False try: # warning cv2.imread does not handle file names with unicode characters. img = cv2.imread(absfile) # run detector results = detector.detect_face(img) if results is not None: total_boxes = results[0] points = results[1] bigbox_idx = np.argmax([(b[2] - b[0]) * (b[3] - b[1]) for b in total_boxes]) # extract aligned face chips chips = detector.extract_image_chips(img, points[bigbox_idx:bigbox_idx + 1], args.size, args.padding) for i, chip in enumerate(chips): if split_ratio > 0: if not class_data_written: ab = "train" class_data_written = True # let validation set has same class label as training set # see source code of pytorch's DatasetFolder os.makedirs(os.path.join(dst_dir, "val", relpath), exist_ok=True) else: ab = "val" if random.random() > split_ratio else "train" dd = os.path.join(dst_dir, ab, relpath) os.makedirs(dd, exist_ok=True) cv2.imwrite(os.path.join(dd, os.path.splitext(filename)[0] + ".png"), chip) class_data_written = True else: dd = os.path.join(dst_dir, relpath) os.makedirs(dd, exist_ok=True) cv2.imwrite(os.path.join(dd, os.path.splitext(filename)[0] + ".png"), chip) success = True except Exception as e: print(relpath, filename, e) pass if not success: os.makedirs(ed, exist_ok=True) shutil.copyfile(absfile, os.path.join(ed, filename)) file_count = file_count + 1 if file_count % 1000 == 0: print(file_count)
class MtcnnInsightface(object): class Face(object): def __init__(self, box, desc, score): self.box = box self.desc = desc self.score = score def __init__(self, detector_dir, recognize_dir, mx_epoch=0, image_size=[112, 112], layer='stage4_unit3_bn3', gpu=-1): os.environ['GLOG_minloglevel'] = '2' self.det = 0 if gpu >= 0: self.ctx = mx.gpu(gpu) else: self.ctx = mx.cpu() self.det_threshold = [0.6, 0.7, 0.8] self.mt_detector = MtcnnDetector(model_folder=detector_dir, ctx=self.ctx, num_worker=1, accurate_landmark=True, threshold=self.det_threshold) sym, arg_params, aux_params = mx.model.load_checkpoint( recognize_dir, mx_epoch) all_layers = sym.get_internals() sym = all_layers[layer + '_output'] rec_model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) rec_model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))]) rec_model.set_params(arg_params, aux_params) self._recognition_model = rec_model print("loaded detection and recognition model successfully") def face_detection(self, face_img): if isinstance(face_img, str): face_img = cv2.imread(face_img) ret = self.mt_detector.detect_face(face_img, det_type=self.det) return ret def face_recognition(self, img): img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.transpose(img, (2, 0, 1)) input_blob = np.expand_dims(img, axis=0) data = mx.nd.array(input_blob) db = mx.io.DataBatch(data=(data, )) self._recognition_model.forward(db, is_train=False) embedding = self._recognition_model.get_outputs()[0].asnumpy() embedding = sklearn.preprocessing.normalize( np.mean(embedding, axis=(2, 3))).flatten() return embedding def face_alignment(self, image, landmarks, desiredLeftEye=[0.35, 0.35], scale=1): landmarks = landmarks.astype(np.float32) leftEye = landmarks[0] rightEye = landmarks[1] dY = rightEye[1] - leftEye[1] dX = rightEye[0] - leftEye[0] angle = np.degrees(np.arctan2(dY, dX)) #- 180 height, width = image.shape[:2] desiredFaceWidth = min(width, int(abs(dX) * 4)) desiredFaceHeight = min( height, 4 * int(landmarks[-1][1] - min(leftEye[1], rightEye[1]))) eyesCenter = ((leftEye[0] + rightEye[0]) // 2, (leftEye[1] + rightEye[1]) // 2) # grab the rotation matrix for rotating and scaling the face M = cv2.getRotationMatrix2D(eyesCenter, angle, scale) # update the translation component of the matrix tX = desiredFaceWidth * 0.5 tY = desiredFaceHeight * desiredLeftEye[1] M[0, 2] += (tX - eyesCenter[0]) M[1, 2] += (tY - eyesCenter[1]) (w, h) = (desiredFaceWidth, desiredFaceHeight) output = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC) return output def extract_feat(self, ori_img, save_prefix=None, bodyskin_prefix=None): ori_img = cv2.imread(ori_img) #ori_img = cv2.cvtColor(ori_img, cv2.COLOR_BGR2RGB) if bodyskin_prefix: if os.path.exists(bodyskin_prefix + "skin.png") and os.path.exists(bodyskin_prefix + "mask.png"): skin_img = cv2.imread(bodyskin_prefix + "skin.png") mask_img = cv2.imread(bodyskin_prefix + "mask.png") else: print("no valid bodyskin_prefix: " + bodyskin_prefix) return ret = self.face_detection(ori_img) all_faces, crop_bbs, bboxes = [], [], [] if ret: bboxes, points = ret for i in range(len(bboxes)): box = bboxes[i] landmarks = points[i, :].reshape((2, 5)).T face_img, M = face_preprocess.preprocess(ori_img, box, landmarks, image_size='112,112') face = self.Face(box, self.face_recognition(face_img), box[-1]) all_faces.append(face) # save crops bb = np.zeros(4, dtype=np.int32) margin = max(abs(box[0] - box[2]), abs(box[1] - box[3])) / 1.2 bb[0] = np.maximum(box[0] - margin / 2, 0) bb[1] = np.maximum(box[1] - margin / 2, 0) bb[2] = np.minimum(box[2] + margin / 2, ori_img.shape[1]) bb[3] = np.minimum(box[3] + margin / 2, ori_img.shape[0]) face_crop = ori_img[bb[1]:bb[3], bb[0]:bb[2], :] # align_landmarks = landmarks # align_landmarks[:,0] -= bb[0] # align_landmarks[:,1] -= bb[1] # face_crop_aligned = self.face_alignment(face_crop, align_landmarks) if save_prefix: cv2.imwrite(save_prefix + str(i) + '_crop.png', face_crop) cv2.imwrite(save_prefix + str(i) + '_crop_aligned.png', face_img) if bodyskin_prefix: face_aligned_skin = cv2.warpAffine(skin_img, M, (112, 112), borderValue=0.0) face_aligned_mask = cv2.warpAffine(mask_img, M, (112, 112), borderValue=0.0) face_aligned_mask = cv2.cvtColor(face_aligned_mask, cv2.COLOR_BGR2GRAY) cv2.imwrite( save_prefix + str(i) + '_crop_aligned_skin.png', face_aligned_skin) cv2.imwrite( save_prefix + str(i) + '_crop_aligned_mask.png', face_aligned_mask) else: print("no detection from mtcnn model") return all_faces, bboxes
def main(save_dir=None, save_img=False, show_img=False): minsize = 20 caffe_model_path = MODEL_PATH threshold = [0.6, 0.7, OUTPUT_THRESHOLD] scale_factor = 0.709 if not save_dir: save_dir = './fd_rlt' if not osp.exists(save_dir): os.makedirs(save_dir) fp_time = open(osp.join(save_dir, 'fd_time.txt'), 'w') t1 = time.clock() detector = MtcnnDetector(caffe_model_path) t2 = time.clock() msg = "initFaceDetector() costs %f seconds" % (t2 - t1) print(msg) fp_time.write(msg + '\n') ttl_time = 0.0 img_cnt = 0 for k in range(1, FOLDS_CNT + 1): k_str = str(k) if k != 10: k_str = "0" + k_str fn_list = osp.join(FDDB_FOLDS_DIR, "FDDB-fold-" + k_str + ".txt") fn_fd_rlt = osp.join(save_dir, "fold-" + k_str + "-out.txt") print('===========================') print('Process image list: ' + fn_list) print('Save results into: ' + fn_fd_rlt) fp_list = open(fn_list, 'r') fp_fd_rlt = open(fn_fd_rlt, 'w') for line in fp_list: imgname = line.strip() imgpath = osp.join(FDDB_IMG_ROOT_DIR, imgname + ".jpg") msg = "---> " + imgpath print(msg) fp_time.write(msg + 'n') img = cv2.imread(imgpath) if img is None: raise Exception('failed to load image: ' + imgpath) resize_factor = 1.0 if DO_RESIZE: print('original image shape: {}'.format(img.shape)) ht, wd, chs = img.shape if ht > wd: resize_factor = float(RESIZED_LONG_SIDE) / ht else: resize_factor = float(RESIZED_LONG_SIDE) / wd wd_new = int(resize_factor * wd) ht_new = int(resize_factor * ht) resized_img = cv2.resize(img, (wd_new, ht_new)) print('resized image shape: {}'.format(resized_img.shape)) # if show_img: # cv2.imshow('resied_img', resized_img) # ch = cv2.waitKey(0) & 0xFF # if ch == 27: # break else: resized_img = img resize_factor_inv = 1.0 / resize_factor img_cnt += 1 t1 = time.clock() bboxes, points = detector.detect_face(resized_img, minsize, threshold, scale_factor) t2 = time.clock() ttl_time += t2 - t1 msg = "detect_face() costs %f seconds" % (t2 - t1) print(msg) fp_time.write(msg + '\n') fp_fd_rlt.write(imgname + "\n") fp_fd_rlt.write(str(len(bboxes)) + "\n") print points if DO_RESIZE: for i in range(len(bboxes)): for j in range(4): bboxes[i][j] *= resize_factor_inv for j in range(10): points[i][j] *= resize_factor_inv for i in range(len(bboxes)): fp_fd_rlt.write(str(bboxes[i][0]) + " ") fp_fd_rlt.write(str(bboxes[i][1]) + " ") fp_fd_rlt.write(str(bboxes[i][2] - bboxes[i][0]) + " ") fp_fd_rlt.write(str(bboxes[i][3] - bboxes[i][1]) + " ") fp_fd_rlt.write(str(bboxes[i][4]) + "\n") fp_fd_rlt.flush() msg = "===> Processed %d images, costs %f seconds, avg time: %f seconds" % ( img_cnt, ttl_time, ttl_time / img_cnt) print(msg) fp_time.write(msg + '\n') fp_time.flush() if save_img or show_img: draw_faces(img, bboxes, points) if save_img: save_name = osp.join(save_dir, osp.basename(imgpath)) cv2.imwrite(save_name, img) if show_img: cv2.imshow('img', img) ch = cv2.waitKey(0) & 0xFF if ch == 27: break fp_list.close() fp_fd_rlt.close() fp_time.close() if show_img: cv2.destroyAllWindows()
def main(): minsize = 20 threshold = [0.6, 0.7, 0.7] scale_factor = 0.709 if not osp.exists(save_dir): os.makedirs(save_dir) fp_rlt = open(osp.join(save_dir, 'rlt.txt'), 'w') t1 = time.clock() detector = MtcnnDetector(caffe_model_path) t2 = time.clock() print("initFaceDetector() costs %f seconds" % (t2 - t1)) fp = open(img_list_file, 'r') fp_rlt.write('\n\nNetwork path: {:s}\n\n'.format(caffe_model_path)) msg = 'minsize={:d}, scale_factor={:.3f}, threshold=[{}, {}, {}]\n'.format( minsize, scale_factor, threshold[0], threshold[1], threshold[2]) print(msg) fp_rlt.write(msg) timer = Timer() fp = open(img_list_file, "r") for line in fp: line = line.strip() if not line: continue line_splits = line.split('/') sub_dir = osp.join(save_dir, line_splits[0]) if not osp.exists(sub_dir): os.mkdir(sub_dir) base_fn = osp.splitext(line_splits[1])[0] fn_det = osp.join(sub_dir, base_fn + '.txt') fp_det = open(fn_det, 'w') fp_det.write(line + '\n') im_file = osp.join(img_root_dir, line) msg = '===> ' + im_file print(msg) fp_rlt.write(msg + '\n') img = cv2.imread(im_file) msg = 'size (W x H): {:d} x {:d} '.format(img.shape[1], img.shape[0]) print(msg) fp_rlt.write(msg + '\n') # Detect all object classes and regress object bounds timer.tic() bboxes, points = detector.detect_face(img, minsize, threshold, scale_factor) timer.toc() msg = ('Detection took {:.3f}s\n').format(timer.diff) print(msg) fp_rlt.write(msg) n_dets = len(bboxes) fp_det.write('{:d}\n'.format(n_dets)) for i in range(n_dets): msg = ('{:f} {:f} {:f} {:f} {:f}\n').format( bboxes[i][0], bboxes[i][1], bboxes[i][2] - bboxes[i][0], bboxes[i][3] - bboxes[i][1], bboxes[i][4]) fp_det.write(msg) fp_det.close() msg = ('===> Processed {:d} images took {:.3f}s, ' 'Avg time: {:.3f}s\n').format(timer.calls, timer.total_time, timer.total_time / timer.calls) print(msg) fp_rlt.write(msg) fp_rlt.write(msg) fp_rlt.flush() fp_rlt.close()
save_model_prefix = "cpt/smpnet" epoch = 730 np_x = np.array([range(128)] * 128) * 2.0 np_pos = np.stack([np.stack([np_x, np_x.transpose(1, 0)], axis=0)] * 1, axis=0) detector = MtcnnDetector(model_folder='../model', ctx=mx.gpu(0), num_worker=1, accurate_landmark=True) model = mx.model.FeedForward.load(save_model_prefix, epoch, ctx=mx.gpu()) camera = cv2.VideoCapture(0) while True: grab, frame = camera.read() rs = detector.detect_face(frame) if rs is not None: bb = rs[0][0] pts = rs[1][0] nose_x = pts[2] nose_y = pts[7] r_left = abs(nose_x - bb[0]) r_right = abs(nose_x - bb[2]) r_top = abs(nose_y - bb[1]) r_bottom = abs(nose_y - bb[3]) r = max(r_left, r_right, r_top, r_bottom) R = 1.2 * r left = int(nose_x - R) right = int(nose_x + R) top = int(nose_y - R) bottom = int(nose_y + R)
recog_classifier = Resnet20(args.model_path, args.ref_path, args.model_type) det_cnt = 0 recog_cnt = 0 error = 0 rectangle_color = (0, 255, 0) if args.save_video: out = cv2.VideoWriter(args.save_path, cv2.VideoWriter_fourcc(*'XVID'), 10, (frame_width, frame_height)) while True: ret, frame = video_capture.read() if ret: if args.detector == "MTCNN": det_results = detector.detect_face(frame) if det_results is None: continue else: bboxes, points = detector.detect_face(frame) elif args.detector == "Retinaface": scales = [0.5, 0.5] bboxes, points = detector.detect(frame, face_threshold, scales=scales, do_flip=False) if bboxes is not None: #bboxes, points = det_results if bboxes.shape[0] == 0: continue
class VideoDetector(object): def __init__(self, arguments, mx_context): self.args = arguments self.ctx = mx_context self.model = face_model.FaceModel(args) self.detector = MtcnnDetector(model_folder='mtcnn-model/', ctx=self.ctx, num_worker=4, accurate_landmark=False) self.names = None # Names of the persons in the dataset self.dataset = None # Collection of features of known names def prepare_faces(self, dataset_name='dataset.pkl'): image_names = os.listdir(self.args.faces_dir) face_names = set([x.split('_')[0] for x in image_names]) dataset = {} for name in face_names: images = [ cv2.imread(os.path.join(self.args.faces_dir, iname)) for iname in image_names if name in iname ] features = [ self.model.get_feature(self.model.get_input(img)) for img in images ] features = np.stack(features) dataset[name] = features dataset_path = os.path.abspath(os.path.join(self.args.faces_dir, '..')) with open(dataset_path + '/' + dataset_name, 'wb') as f: pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL) def detect(self): if self.dataset is None: self.load_features() cap = cv2.VideoCapture(args.in_file) # Create a VideoCapture object frame_w, frame_h = int(cap.get(3)), int( cap.get(4)) # Convert resolutions from float to integer. total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) renders = [] frame_time = np.array([]) for _ in tqdm(range(total_frames)): start = time() ret, frame = cap.read() if ret: render = self.detect_faces(frame) renders.append(render) frame_time = np.append(frame_time, time() - start) cap.release() return renders, { 'w': frame_w, 'h': frame_h }, { 'fr_exec': frame_time.mean() } def load_features(self, dataset_name='dataset.pkl'): dataset_path = os.path.abspath(os.path.join(self.args.faces_dir, '..')) with open(dataset_path + '/' + dataset_name, 'rb') as f: # Load Dataset on numpy format np_dataset = pickle.load(f) # Create dictionary with person names and their corresponding feature index self.names = {} i = 0 for k, v in np_dataset.items(): self.names[k] = slice(i, i + v.shape[0]) i += v.shape[0] # Transform dataset to mx NDarray format self.dataset = nd.array(np.concatenate( [v for v in np_dataset.values()]), ctx=self.ctx) def draw_names(self, frame, names): # names: dict{'name' : bounding_box} colors = box_colors[:len(names)] for name, b, c in zip(names.keys(), names.values(), colors): if name == 'unknown': for x in b: cv2.rectangle(frame, (int(x[0]), int(x[1])), (int(x[2]), int(x[3])), colors[-1], 2) # cv2.putText(frame, 'unknown', (int(b[0]),int(b[1])), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 255, 255), 2, cv2.LINE_AA) else: cv2.rectangle(frame, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), c, 2) cv2.putText(frame, name, (int(b[0]), int(b[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 3, cv2.LINE_AA) return frame def name_faces(self, persons, total_boxes): faces_names = {} unknown_faces = [] for person, box in zip(persons, total_boxes): face = self.model.get_input(person) if face is None: continue face = nd.array(self.model.get_feature(face), ctx=self.ctx) # Calculate the similarity between the known features and the current face feature sim = nd.dot(self.dataset, face) scores = {} for known_id, index in self.names.items(): scores[known_id] = max(sim[index]).asnumpy() if max(scores.values()) > self.args.threshold_face: faces_names[max(scores, key=scores.get)] = box else: unknown_faces.append(box) if len(unknown_faces): faces_names['unknown'] = unknown_faces return faces_names def detect_faces(self, frame): resolution = int(self.args.image_size.split(',')[0]) # run detector results = self.detector.detect_face(frame) if results is not None: total_boxes = results[0] points = results[1] # extract aligned face chips persons = self.detector.extract_image_chips( frame, points, resolution, 0.37) if self.args.recognize: faces_names = self.name_faces(persons, total_boxes) else: faces_names = {'unknown': [box for box in total_boxes]} return self.draw_names(frame, faces_names) else: return frame
from mtcnn_detector import MtcnnDetector import os import cv2 import mxnet as mx mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model') ctx = mx.gpu(0) det_threshold = [0.6,0.7,0.8] # detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark = True, threshold=det_threshold) # detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark = True, threshold=[0.0,0.0,0.2]) detector = MtcnnDetector() img_path = "Tom_Hanks_54745.png" img_path = "a.jpg" img = cv2.imread(img_path) # print img ret = detector.detect_face(img, det_type=1) bbox, landmark = ret # print bbox pointx = landmark[0][:5] pointy = landmark[0][5:] for x, y in zip(pointx, pointy): cv2.circle(img, (x, y), 1, (0, 0, 255), -1) cv2.imshow("img", img) cv2.waitKey(0) cv2.destroyAllWindows()
# # cv2.imshow("detection result", draw) # cv2.waitKey(0) # -------------- # test on camera # -------------- camera = cv2.VideoCapture(0) while True: grab, frame = camera.read() img = cv2.resize(frame, (320, 180)) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) t1 = time.time() results = detector.detect_face(img) print(f'shape: {img.shape}, time: {time.time() - t1}') if results is None: continue total_boxes = results[0] points = results[1] draw = img.copy() for b in total_boxes: cv2.rectangle(draw, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255)) for p in points: for i in range(5):
def main(): args = get_args() src_file = args.src if not os.path.exists(src_file): raise ValueError("src dir not exist {}".format(src_file)) split_ratio = args.split dst_dir = os.path.abspath(args.dst) num_gpus = args.ngpus if num_gpus == -1: num_gpus = len(mx.test_utils.list_gpus()) if num_gpus == 0: ctx = mx.cpu(0) else: ctx = [mx.gpu(i) for i in range(num_gpus)] print("src={} dst dir={} gpu={}".format(src_file, dst_dir, num_gpus)) s = read_clean_list(args.cleanlist) detector = MtcnnDetector(model_folder='model', ctx=ctx, num_worker=args.workers, accurate_landmark=True) file_count = 0 with open(src_file, "r", encoding="utf-8") as f: last_m_id = "x" for line in f: m_id, image_search_rank, image_url, page_url, face_id, face_rectangle, face_data = line.split("\t") # rect = struct.unpack("ffff", base64.b64decode(face_rectangle)) if "{}/{}".format(m_id, image_search_rank) in s: data = np.frombuffer(base64.b64decode(face_data), dtype=np.uint8) img = cv2.imdecode(data, cv2.IMREAD_COLOR) h, w, _ = img.shape if h > 128 and w > 128: try: # run detector results = detector.detect_face(img) if results is not None: total_boxes = results[0] points = results[1] bigbox_idx = np.argmax([(b[2] - b[0]) * (b[3] - b[1]) for b in total_boxes]) # extract aligned face chips chips = detector.extract_image_chips(img, points[bigbox_idx:bigbox_idx + 1], args.size, args.padding) for i, chip in enumerate(chips): if last_m_id != m_id: ab = "train" # let validation set has same class label as training set # see source code of pytorch's DatasetFolder else: ab = "val" if random.random() > split_ratio else "train" dd = os.path.join(dst_dir, ab, m_id) os.makedirs(dd, exist_ok=True) cv2.imwrite(os.path.join(dd, "{}.png".format(image_search_rank)), chip) last_m_id = m_id except Exception as e: print(m_id, image_search_rank, e) file_count = file_count + 1 if file_count % 1000 == 0: print(file_count)