def main(): global colours, img_size args = parse_args() videos_dir = args.videos_dir output_path = args.output_path no_display = args.no_display detect_interval = args.detect_interval # you need to keep a balance between performance and fluency margin = args.margin # if the face is big in your video ,you can set it bigger for tracking easiler scale_rate = args.scale_rate # if set it smaller will make input frames smaller show_rate = args.show_rate # if set it smaller will dispaly smaller frames face_score_threshold = args.face_score_threshold mkdir(output_path) # for display if not no_display: colours = np.random.rand(32, 3) # init tracker tracker = Sort() # create instance of the SORT tracker logger.info('Start track and extract......') with tf.Graph().as_default(): with tf.Session( config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True), log_device_placement=False)) as sess: pnet, rnet, onet = detect_face.create_mtcnn( sess, os.path.join(project_dir, "align")) minsize = 40 # minimum size of face for mtcnn to detect threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor for filename in os.listdir(videos_dir): logger.info('All files:{}'.format(filename)) for filename in os.listdir(videos_dir): suffix = filename.split('.')[1] if suffix != 'mp4' and suffix != 'avi': # you can specify more video formats if you need continue video_name = os.path.join(videos_dir, filename) directoryname = os.path.join(output_path, filename.split('.')[0]) logger.info('Video_name:{}'.format(video_name)) #cam = cv2.VideoCapture(video_name) cam = cv2.VideoCapture(0) c = 0 while True: final_faces = [] addtional_attribute_list = [] ret, frame = cam.read() if not ret: logger.warning("ret false") break if frame is None: logger.warning("frame drop") break frame = cv2.resize(frame, (0, 0), fx=scale_rate, fy=scale_rate) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) print('shape of gray') print(gray.shape) r_g_b_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if c % detect_interval == 0: img_size = np.asarray(frame.shape)[0:2] mtcnn_starttime = time() faces, points = detect_face.detect_face( r_g_b_frame, minsize, pnet, rnet, onet, threshold, factor) logger.info( "MTCNN detect face cost time : {} s".format( round(time() - mtcnn_starttime, 3))) # mtcnn detect ,slow face_sums = faces.shape[0] if face_sums > 0: face_list = [] for i, item in enumerate(faces): score = round(faces[i, 4], 6) if score > face_score_threshold: det = np.squeeze(faces[i, 0:4]) # face rectangle det[0] = np.maximum(det[0] - margin, 0) det[1] = np.maximum(det[1] - margin, 0) det[2] = np.minimum( det[2] + margin, img_size[1]) det[3] = np.minimum( det[3] + margin, img_size[0]) face_list.append(item) # face cropped bb = np.array(det, dtype=np.int32) # use 5 face landmarks to judge the face is front or side squeeze_points = np.squeeze(points[:, i]) tolist = squeeze_points.tolist() facial_landmarks = [] for j in range(5): item = [tolist[j], tolist[(j + 5)]] facial_landmarks.append(item) if args.face_landmarks: for (x, y) in facial_landmarks: cv2.circle(frame, (int(x), int(y)), 3, (0, 255, 0), -1) cropped = frame[bb[1]:bb[3], bb[0]:bb[2], :].copy() dist_rate, high_ratio_variance, width_rate = judge_side_face( np.array(facial_landmarks)) # face addtional attribute(index 0:face score; index 1:0 represents front face and 1 for side face ) item_list = [ cropped, score, dist_rate, high_ratio_variance, width_rate ] addtional_attribute_list.append(item_list) final_faces = np.array(face_list) emotion = 'Happy' face_detection = cv2.CascadeClassifier( 'haarcascade_frontalface_default.xml') emotion_classifier = load_model( 'models/_mini_XCEPTION.106-0.65.hdf5', compile=False) EMOTIONS = [ "angry", "disgust", "scared", "happy", "sad", "surprised", "neutral" ] frontal_faces = face_detection.detectMultiScale( gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30), flags=cv2.CASCADE_SCALE_IMAGE) if len(frontal_faces) > 0: frontal_faces = sorted(frontal_faces, reverse=True, key=lambda x: (x[2] - x[0]) * (x[3] - x[1]))[0] (fX, fY, fW, fH) = frontal_faces roi = gray[fY:fY + fH, fX:fX + fW] roi = cv2.resize(roi, (48, 48)) roi = roi.astype("float") / 255.0 roi = img_to_array(roi) roi = np.expand_dims(roi, axis=0) np.reshape(roi, (48, 48, 1)) print(roi.shape) preds = emotion_classifier.predict(roi)[0] emotion_probability = np.max(preds) label = EMOTIONS[preds.argmax()] trackers = tracker.update(final_faces, img_size, directoryname, addtional_attribute_list, detect_interval) c += 1 emoTracker = '' print(trackers) for d in trackers: if not no_display: d = d.astype(np.int32) cv2.rectangle(frame, (d[0], d[1]), (d[2], d[3]), colours[d[4] % 32, :] * 255, 3) if final_faces != []: print('ID %d Detect' % (d[4])) if label != emoTracker: emoTracker = label cv2.putText( frame, 'ID : %d DETECT, EMOTION : %s' % ((d[4]), emoTracker), (d[0] - 10, d[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.4, colours[d[4] % 32, :] * 255, 2) else: cv2.putText(frame, 'ID : %d' % (d[4]), (d[0] - 10, d[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.75, colours[d[4] % 32, :] * 255, 2) if not no_display: frame = cv2.resize(frame, (0, 0), fx=show_rate, fy=show_rate) cv2.imshow("Frame", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break
def detectExtract(a): global colours, img_size args = parse_args() videos_dir = args.videos_dir output_path = args.output_path no_display = args.no_display detect_interval = args.detect_interval # you need to keep a balance between performance and fluency margin = args.margin # if the face is big in your video ,you can set it bigger for tracking easiler scale_rate = args.scale_rate # if set it smaller will make input frames smaller show_rate = args.show_rate # if set it smaller will dispaly smaller frames face_score_threshold = args.face_score_threshold mkdir(output_path) # for display if not no_display: colours = np.random.rand(32, 3) # init tracker tracker = Sort() # create instance of the SORT tracker logger.info('Start track and extract......') with tf.Graph().as_default(): with tf.Session( config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True), log_device_placement=True)) as sess: pnet, rnet, onet = detect_face.create_mtcnn( sess, os.path.join(project_dir, "align")) minsize = 80 # minimum size of face for mtcnn to detect threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor for filename in os.listdir(videos_dir): logger.info('All files:{}'.format(filename)) for filename in os.listdir(videos_dir): suffix = filename.split('.')[1] if suffix != 'mp4' and suffix != 'avi': # you can specify more video formats if you need continue video_name = os.path.join(videos_dir, filename) directoryname = os.path.join(output_path, filename.split('.')[0]) logger.info('Video_name:{}'.format(video_name)) cam = cv2.VideoCapture(video_name) c = 0 while True: final_faces = [] addtional_attribute_list = [] ret, frame = cam.read() if not ret: logger.warning("ret false") break if frame is None: logger.warning("frame drop") break # frame = cv2.resize(frame, (0, 0), fx=scale_rate, fy=scale_rate) r_g_b_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if c % detect_interval == 0: img_size = np.asarray(frame.shape)[0:2] mtcnn_starttime = time() faces, points = detect_face.detect_face( r_g_b_frame, minsize, pnet, rnet, onet, threshold, factor) logger.info( "MTCNN detect face cost time : {} s".format( round(time() - mtcnn_starttime, 3))) # mtcnn detect ,slow face_sums = faces.shape[0] if face_sums > 0: face_list = [] for i, item in enumerate(faces): score = round(faces[i, 4], 6) if score > face_score_threshold: det = np.squeeze(faces[i, 0:4]) # face rectangle det[0] = np.maximum(det[0] - margin, 0) det[1] = np.maximum(det[1] - margin, 0) det[2] = np.minimum( det[2] + margin, img_size[1]) det[3] = np.minimum( det[3] + margin, img_size[0]) face_list.append(item) # face cropped bb = np.array(det, dtype=np.int32) # use 5 face landmarks to judge the face is front or side squeeze_points = np.squeeze(points[:, i]) tolist = squeeze_points.tolist() facial_landmarks = [] for j in range(5): item = [tolist[j], tolist[(j + 5)]] facial_landmarks.append(item) if args.face_landmarks: for (x, y) in facial_landmarks: cv2.circle(frame, (int(x), int(y)), 3, (0, 255, 0), -1) cropped = frame[bb[1]:bb[3], bb[0]:bb[2], :].copy() dist_rate, high_ratio_variance, width_rate = judge_side_face( np.array(facial_landmarks)) # face addtional attribute(index 0:face score; index 1:0 represents front face and 1 for side face ) item_list = [ cropped, score, dist_rate, high_ratio_variance, width_rate ] addtional_attribute_list.append(item_list) final_faces = np.array(face_list) trackers = tracker.update(final_faces, img_size, directoryname, addtional_attribute_list, detect_interval) c += 1 for d in trackers: if not no_display: d = d.astype(np.int32) cv2.rectangle(frame, (d[0], d[1]), (d[2], d[3]), colours[d[4] % 32, :] * 255, 3) if final_faces != []: cv2.putText(frame, 'ID : %d DETECT' % (d[4]), (d[0] - 10, d[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.75, colours[d[4] % 32, :] * 255, 2) cv2.putText(frame, 'DETECTOR', (5, 45), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (1, 1, 1), 2) else: cv2.putText(frame, 'ID : %d' % (d[4]), (d[0] - 10, d[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.75, colours[d[4] % 32, :] * 255, 2) if not no_display: frame = cv2.resize(frame, (0, 0), fx=show_rate, fy=show_rate) cv2.imshow("Frame", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(): global colours, img_size args = parse_args() # videos_dir = args.videos_dir output_path = args.output_path no_display = args.no_display detect_interval = args.detect_interval # you need to keep a balance between performance and fluency margin = args.margin # if the face is big in your video ,you can set it bigger for tracking easiler scale_rate = args.scale_rate # if set it smaller will make input frames smaller show_rate = args.show_rate # if set it smaller will dispaly smaller frames face_score_threshold = args.face_score_threshold mkdir(output_path) # for display if not no_display: colours = np.random.rand(32, 3) # init tracker tracker = Sort() # create instance of the SORT tracker logger.info('Start track and extract......') with tf.Graph().as_default(): with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True), log_device_placement=False)) as sess: pnet, rnet, onet = detect_face.create_mtcnn(sess, os.path.join(project_dir, "align")) minsize = 40 # minimum size of face for mtcnn to detect threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # for filename in os.listdir(videos_dir): # logger.info('All files:{}'.format(filename)) # for filename in os.listdir(videos_dir): # suffix = filename.split('.')[1] # if suffix != 'mp4' and suffix != 'avi': # you can specify more video formats if you need # continue # video_name = os.path.join(videos_dir, filename) # directoryname = os.path.join(output_path, filename.split('.')[0]) # logger.info('Video_name:{}'.format(video_name)) directoryname=output_path cam = cv2.VideoCapture(0) #video_name --> 0 indicating feed from camera c = 0 while True: final_faces = [] addtional_attribute_list = [] ret, frame = cam.read() if not ret: logger.warning("ret false") break if frame is None: logger.warning("frame drop") break frame = cv2.resize(frame, (0, 0), fx=scale_rate, fy=scale_rate) r_g_b_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if c % detect_interval == 0: img_size = np.asarray(frame.shape)[0:2] mtcnn_starttime = time() faces, points = detect_face.detect_face(r_g_b_frame, minsize, pnet, rnet, onet, threshold, factor) logger.info("MTCNN detect face cost time : {} s".format( round(time() - mtcnn_starttime, 3))) # mtcnn detect ,slow face_sums = faces.shape[0] # if len(face_boxes) > 0: # for i in range(len(face_boxes)): # box = face_boxes[i] # cropped_face = frame[box[0]:box[2], box[1]:box[3], :] # cropped_face = cv2.resize(cropped_face, (160, 160), interpolation=cv2.INTER_AREA) # feature = face_recognition.recognize(cropped_face) # (name , proba , conf) = face_classfier.classify(feature) # # cv2.rectangle(frame, (box[1], box[0]), (box[3], box[2]), (0, 255, 0), 2) # name = name+":%"+ proba # # plot result idx under box # text_x = box[1] # text_y = box[2] + 20 # if conf: # cv2.putText(frame, name, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, # 1, (0, 0, 255), thickness=1, lineType=2) if face_sums > 0: face_list = [] for i, item in enumerate(faces): score = round(faces[i, 4], 2) if score > face_score_threshold: det = np.squeeze(faces[i, 0:4]) # face rectangle det[0] = np.maximum(det[0] - margin, 0) det[1] = np.maximum(det[1] - margin, 0) det[2] = np.minimum(det[2] + margin, img_size[1]) det[3] = np.minimum(det[3] + margin, img_size[0]) face_list.append(item) # face cropped bb = np.array(det, dtype=np.int32) # use 5 face landmarks to judge the face is front or side squeeze_points = np.squeeze(points[:, i]) tolist = squeeze_points.tolist() facial_landmarks = [] for j in range(5): item = [tolist[j], tolist[(j + 5)]] facial_landmarks.append(item) if args.face_landmarks: for (x, y) in facial_landmarks: cv2.circle(frame, (int(x), int(y)), 3, (0, 255, 0), -1) cropped = frame[bb[1]:bb[3], bb[0]:bb[2], :].copy() cropped_face = cv2.resize(cropped, (160, 160), interpolation=cv2.INTER_AREA) feature = face_recognition.recognize(cropped_face) (name , proba , conf) = face_classfier.classify(feature) #cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2) name = name+":%"+ proba # plot result idx under box text_x = bb[0] - 10 text_y = bb[1] - 10 if conf: cv2.putText(frame, name, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) dist_rate, high_ratio_variance, width_rate = judge_side_face( np.array(facial_landmarks)) # face addtional attribute(index 0:face score; index 1:0 represents front face and 1 for side face ) item_list = [cropped, score, dist_rate, high_ratio_variance, width_rate] addtional_attribute_list.append(item_list) final_faces = np.array(face_list) trackers = tracker.update(final_faces, img_size, directoryname, addtional_attribute_list, detect_interval) c += 1 for d in trackers: # print("D values:",d) if not no_display: d = d.astype(np.int32) cv2.rectangle(frame, (d[0], d[1]), (d[2], d[3]), colours[d[4] % 32, :] * 255, 3) if final_faces != []: # cv2.putText(frame, 'ID : %d DETECT' % (d[4]), (d[0] - 10, d[1] - 10), # cv2.FONT_HERSHEY_SIMPLEX, # 0.75, # colours[d[4] % 32, :] * 255, 2) cv2.putText(frame, 'DETECTOR', (5, 45), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (1, 1, 1), 2) else: cv2.putText(frame, 'ID : %d' % (d[4]), (d[0] - 10, d[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.75, colours[d[4] % 32, :] * 255, 2) if not no_display: frame = cv2.resize(frame, (0, 0), fx=show_rate, fy=show_rate) cv2.imshow("Frame", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break # print("Debugging state:...") # print("faces size",faces.shape) # print("faces content",faces) # print("RGB frame") # print(r_g_b_frame) # print("RGB frame size",r_g_b_frame.shape) # print("face list shape",len(face_list)) # print(face_list) # print("Det",det) # cam.release() cv2.destroyAllWindows() # modify the default parameters of np.load np.load.__defaults__=(None, False, True, 'ASCII')
for box in decision_boxes: a_bbox = [] for ii in range(0, 4): a_bbox.append(box['bbox']['points'][ii]) a_bbox.append(0.99) a_bbox.append(box['bbox']['score']) if box['bbox']['label'] in vehicle: a_bbox.append( float(vehicle.index(box['bbox']['label']))) else: assert ('Vehicle TYPE ERROR') sort_box.append(a_bbox) sort_box = torch.tensor(sort_box).cuda() tracked_objects = mot_tracker.update(sort_box.cpu()) for x1, y1, x2, y2, obj_id, cls_pred in tracked_objects: cls = vehicle[int(cls_pred)] #img_result = cv2.UMat(img_result) #cv2.putText(img_result, cls + ":" + str(int(obj_id)),(int(x1+30), int(y1 - 10)), cv2.FONT_HERSHEY_SIMPLEX, 1, (225,225,0), 3) ############################ SORT ###################################### # save img_result # 偵測違規 if violation_frame == -1: for i in range(len(decision_boxes)): if decision_boxes[i]['decision'] != 'pass': #建立違規資料夾 directory = './violation/' + str(c) createFolder(directory)
def main(called_from, stream_name, vid_strt_time, video_folder): global colours, img_size args = parse_args(video_folder) videos_dir = args.videos_dir output_path = args.output_path no_display = args.no_display detect_interval = args.detect_interval # you need to keep a balance between performance and fluency margin = args.margin # if the face is big in your video ,you can set it bigger for tracking easiler scale_rate = args.scale_rate # if set it smaller will make input frames smaller show_rate = args.show_rate # if set it smaller will dispaly smaller frames face_score_threshold = args.face_score_threshold mkdir(output_path) # for display if not no_display: colours = np.random.rand(100, 3) # init tracker tracker = Sort() # create instance of the SORT tracker logger.info('Start track and extract......') with tf.Graph().as_default(): with tf.Session( config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True), log_device_placement=False)) as sess: pnet, rnet, onet = detect_face.create_mtcnn( sess, os.path.join(project_dir, "align")) minsize = 70 # minimum size of face for mtcnn to detect threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor for filename in os.listdir(videos_dir): logger.info('All files:{}'.format(filename)) for filename in os.listdir(videos_dir): suffix = filename.split('.')[1] if suffix != 'mp4' and suffix != 'avi' and suffix != 'flv': # you can specify more video formats if you need continue video_name = os.path.join(videos_dir, filename) directoryname = os.path.join(output_path, filename.split('.')[0]) logger.info('Video_name:{}'.format(video_name)) cam = cv2.VideoCapture(video_name) c = 0 while True: final_faces = [] addtional_attribute_list = [] ret, frame = cam.read() start_time.append(time.time()) # print(time.time()) if not ret: logger.warning("ret false") break if frame is None: logger.warning("frame drop") break frame = cv2.resize(frame, (0, 0), fx=scale_rate, fy=scale_rate) r_g_b_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if c % detect_interval == 0: img_size = np.asarray(frame.shape)[0:2] mtcnn_starttime = time.time() faces, points = detect_face.detect_face( r_g_b_frame, minsize, pnet, rnet, onet, threshold, factor) logger.info( "MTCNN detect face cost time : {} s".format( round(time.time() - mtcnn_starttime, 3))) # mtcnn detect ,slow face_sums = faces.shape[0] if face_sums > 0: face_list = [] for i, item in enumerate(faces): score = round(faces[i, 4], 6) if score > face_score_threshold: det = np.squeeze(faces[i, 0:4]) # face rectangle det[0] = np.maximum(det[0] - margin, 0) det[1] = np.maximum(det[1] - margin, 0) det[2] = np.minimum( det[2] + margin, img_size[1]) det[3] = np.minimum( det[3] + margin, img_size[0]) face_list.append(item) # face cropped bb = np.array(det, dtype=np.int32) # use 5 face landmarks to judge the face is front or side squeeze_points = np.squeeze(points[:, i]) tolist = squeeze_points.tolist() facial_landmarks = [] for j in range(5): item = [tolist[j], tolist[(j + 5)]] facial_landmarks.append(item) if args.face_landmarks: for (x, y) in facial_landmarks: cv2.circle(frame, (int(x), int(y)), 3, (0, 255, 0), -1) cropped = frame[bb[1]:bb[3], bb[0]:bb[2], :].copy() dist_rate, high_ratio_variance, width_rate = judge_side_face( np.array(facial_landmarks)) # face addtional attribute(index 0:face score; index 1:0 represents front face and 1 for side face ) item_list = [ cropped, score, dist_rate, high_ratio_variance, width_rate ] addtional_attribute_list.append(item_list) final_faces = np.array(face_list) trackers = tracker.update(final_faces, img_size, directoryname, addtional_attribute_list, detect_interval) c += 1 for d in trackers: d = d.astype(np.int32) g.append(str(start_time[4])) if not no_display: d = d.astype(np.int32) if final_faces != []: try: os.mkdir('DB/' + called_from + str(d[4])) first_time = round(start_time[0]) face_time = round(time.time()) entryTime = face_time - first_time dr = parser.parse(vid_strt_time) a = dr + timedelta(seconds=entryTime) real_enter_time = a.strftime("%H:%M:%S") f = open("DB/entryTime.txt", "a") f.write(called_from + str(d[4]) + ',' + real_enter_time + "\n") f.close() f = open("DB/stream_of_folder.txt", "a") f.write(stream_name + ',' + called_from + str(d[4]) + "\n") f.close() profile_id = called_from + str(d[4]) except Exception: print("The folder already exists!!") image_path = "DB/" + called_from + str( d[4]) + '/' + str( random.randint(32, 12141212)) + ".jpg" d[0] = d[0] d[1] = d[1] save(frame, d, image_path) original_height = (d[3] - d[1]) if not no_display: frame = cv2.resize(frame, (0, 0), fx=show_rate, fy=show_rate)
def main(): global colours, img_size args = parse_args() #一个或多个视频存放路径 root_dir = args.root_dir #采集并裁剪人脸保存路径 output_path = args.output_path display = args.display mkdir(output_path) if display: colours = np.random.rand(32, 3) #初始化tracker tracker = Sort() logger.info('start track and extract......') with tf.Graph().as_default(): with tf.Session( config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True), log_device_placement=False)) as sess: pnet, rnet, onet = detect_face.create_mtcnn(sess, "align") margin = 50 minsize = 60 threshold = [0.6, 0.7, 0.7] factor = 0.709 frame_interval = 1 # 每多少帧检测一次,默认3 scale_rate = 1 #对输入frame进行resize show_rate = 1 #对输出frame进行resize for filename in os.listdir(root_dir): logger.info('all files:{}'.format(filename)) #遍历所有mp4格式的视频文件 for filename in os.listdir(root_dir): if filename.split('.')[1] != 'mp4': continue video_name = os.path.join(root_dir, filename) directoryname = os.path.join(output_path, filename.split('.')[0]) logger.info('video_name:{}'.format(video_name)) cam = cv2.VideoCapture(video_name) c = 0 while True: final_faces = [] addtional_attribute_list = [] ret, frame = cam.read() if not ret: logger.warning("ret false") break if frame is None: logger.warning("frame drop") break frame = cv2.resize(frame, (0, 0), fx=scale_rate, fy=scale_rate) r_g_b_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if c % frame_interval == 0: img_size = np.asarray(frame.shape)[0:2] faces, points = detect_face.detect_face( r_g_b_frame, minsize, pnet, rnet, onet, threshold, factor) face_sums = faces.shape[0] if face_sums > 0: face_list = [] for i, item in enumerate(faces): #检测人脸的置信度 f = round(faces[i, 4], 6) if f > 0.95: det = np.squeeze(faces[i, 0:4]) # face rectangle det[0] = np.maximum(det[0] - margin, 0) det[1] = np.maximum(det[1] - margin, 0) det[2] = np.minimum( det[2] + margin, img_size[1]) det[3] = np.minimum( det[3] + margin, img_size[0]) face_list.append(item) # face cropped bb = np.array(det, dtype=np.int32) frame_copy = frame.copy() cropped = frame_copy[bb[1]:bb[3], bb[0]:bb[2], :] # use 5 face landmarks to judge the face is front or side squeeze_points = np.squeeze(points[:, i]) tolist = squeeze_points.tolist() facial_landmarks = [] for j in range(5): item = [tolist[j], tolist[(j + 5)]] facial_landmarks.append(item) #可视化关键点 if args.face_landmarks: for (x, y) in facial_landmarks: cv2.circle(frame_copy, (int(x), int(y)), 3, (0, 0, 255), 2) #计算三个重要值用于判断正脸 dist_rate, high_ratio_variance, width_rate = judge_side_face( np.array(facial_landmarks)) # face addtional attribute(index 0:face score; index 1:0 represents front face and 1 for side face ) item_list = [ cropped, faces[i, 4], dist_rate, high_ratio_variance, width_rate ] addtional_attribute_list.append(item_list) final_faces = np.array(face_list) trackers = tracker.update(final_faces, img_size, directoryname, addtional_attribute_list, r_g_b_frame) c += 1 for d in trackers: if display: d = d.astype(np.int32) cv2.rectangle(frame, (d[0], d[1]), (d[2], d[3]), colours[d[4] % 32, :] * 255, 5) cv2.putText(frame, 'ID : %d' % (d[4]), (d[0] - 10, d[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.75, colours[d[4] % 32, :] * 255, 2) if display: frame = cv2.resize(frame, (0, 0), fx=show_rate, fy=show_rate) cv2.imshow("Frame", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(): # 参数设置 global colours, img_size args = parse_args() videos_dir = args.videos_dir output_path = args.output_path no_display = args.no_display detect_interval = args.detect_interval # you need to keep a balance between performance and fluency margin = args.margin # if the face is big in your video ,you can set it bigger for tracking easiler scale_rate = args.scale_rate # if set it smaller will make input frames smaller show_rate = args.show_rate # if set it smaller will dispaly smaller frames face_score_threshold = args.face_score_threshold # 创建输出目录 mkdir(output_path) # for display if not no_display: colours = np.random.rand(32, 3) # 初始化SORT追踪器 tracker = Sort() logger.info('开始追踪人脸并且提取......') with tf.Graph().as_default(): with tf.Session( config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True), log_device_placement=False)) as sess: # 创建人脸检测MTCNN的三个网络 pnet, rnet, onet = detect_face.create_mtcnn( sess, os.path.join(project_dir, "align")) # 设置MTCNN能检测到的最小size的脸 minsize = 40 # 40 # 三个网络的人脸阈值 threshold = [0.6, 0.7, 0.7] # 尺度因子 factor = 0.709 # scale factor for filename in os.listdir(videos_dir): logger.info('所需要检测并追踪人脸的文件:{}'.format(filename)) for filename in os.listdir(videos_dir): suffix = filename.split('.')[1] if suffix != 'mp4' and suffix != 'avi': # 可以设置过滤不同的文件格式 continue video_name = os.path.join(videos_dir, filename) directoryname = os.path.join(output_path, filename.split('.')[0]) logger.info('当前检测的视频文件:{}'.format(video_name)) # 创建视频帧扑捉器,参数为文件路径或者0代表本地摄像头 cam = cv2.VideoCapture(video_name) ret, frame = cam.read() # 创建视频保存器 size = (frame.shape[1], frame.shape[0]) video_writer = cv2.VideoWriter("output/minisize_40_07.avi", cv2.VideoWriter_fourcc(*'DIVX'), 24, size) # 记录已经读取到的帧数 c = 0 while ret: # 循环读取整个视频文件的帧 final_faces = [] addtional_attribute_list = [] # ret表示有没有读取到帧true\false ret, frame = cam.read() if not ret: logger.warning("ret false,没有读取到任何帧!") break if frame is None: logger.warning("frame drop,读取到的帧为空!") break # cv2.resize(src, dsize, dst=None, fx=None, fy=None, interpolation=None) # scr: 原图 # dsize:输出图像尺寸 # fx: 沿水平轴的比例因子 # fy: 沿垂直轴的比例因子 # interpolation:插值方法 frame = cv2.resize(frame, (0, 0), fx=scale_rate, fy=scale_rate) # 将读取到的帧转换为rgb色彩空间 r_g_b_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # 每隔detect_interval个帧检测跟踪一次 if c % detect_interval == 0: img_size = np.asarray(frame.shape)[0:2] # mtcnn开始检测的时间戳,用于计时 mtcnn_starttime = time() # 使用MTCNN实施检测,返回检测到的所有可能为face的(x1,y1,x2,y2,score)和对应的landmark faces, points = detect_face.detect_face( r_g_b_frame, minsize, pnet, rnet, onet, threshold, factor) logger.info("MTCNN检测人脸花费时间: {} s".format( round(time() - mtcnn_starttime, 3))) face_sums = faces.shape[0] # 如果检测到的脸大于0,将每张脸剪切出来,并进行侧脸评判 if face_sums > 0: face_list = [] for i, item in enumerate(faces): score = round(faces[i, 4], 6) if score > face_score_threshold: det = np.squeeze(faces[i, 0:4]) # face rectangle det[0] = np.maximum(det[0] - margin, 0) det[1] = np.maximum(det[1] - margin, 0) det[2] = np.minimum( det[2] + margin, img_size[1]) det[3] = np.minimum( det[3] + margin, img_size[0]) face_list.append(item) # face cropped bb = np.array(det, dtype=np.int32) # bounding box 距离fram左边的距离 bb_left = det[0] # bounding box 距离fram上边的距离 bb_top = det[1] # use 5 face landmarks to judge the face is front or side squeeze_points = np.squeeze(points[:, i]) tolist = squeeze_points.tolist() facial_landmarks = [] facial_landmarks_crap = [] for j in range(5): item = [tolist[j], tolist[(j + 5)]] facial_landmarks.append(item) item_crap = [ tolist[j] - bb_left, tolist[(j + 5)] - bb_top ] facial_landmarks_crap.append(item_crap) # 为脸画出标定点 if args.face_landmarks: for (x, y) in facial_landmarks: cv2.circle(frame, (int(x), int(y)), 3, (0, 255, 0), -1) # 从帧中复制出人脸 cropped = frame[bb[1]:bb[3], bb[0]:bb[2], :].copy() # 计算五个标定点的高宽比例,高均方差,宽均方差 dist_rate, high_ratio_variance, width_rate = judge_side_face( np.array(facial_landmarks)) # 整个视频中脸部附加属性addtional_attribute (index 0:face score; index 1:0代表正脸,1代表侧脸 ) item_list = [ cropped, score, dist_rate, high_ratio_variance, width_rate, facial_landmarks_crap ] ##### 添加参数facial_landmarks addtional_attribute_list.append(item_list) # 获取的每帧的face的score大于阈值的face final_faces = np.array(face_list) trackers = tracker.update(final_faces, img_size, directoryname, addtional_attribute_list, detect_interval) c += 1 for d in trackers: if not no_display: d = d.astype(np.int32) cv2.rectangle(frame, (d[0], d[1]), (d[2], d[3]), colours[d[4] % 32, :] * 255, 3) if final_faces != []: cv2.putText(frame, 'ID : %d DETECT' % (d[4]), (d[0] - 10, d[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.75, colours[d[4] % 32, :] * 255, 2) cv2.putText(frame, 'DETECTOR', (5, 45), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (1, 1, 1), 2) else: cv2.putText(frame, 'ID : %d' % (d[4]), (d[0] - 10, d[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.75, colours[d[4] % 32, :] * 255, 2) # video_writer.write(frame) data = {"type_code": type_code, "area_id": area_id} file = { "scene_img": ("scene_img.jpg", cv2.imencode(".jpg", frame)[1].tobytes(), "image/jpg") } res = requests.post(url=post_scene_url, files=file, data=data) logger.info("检测场景发送到服务器,响应码:{}".format(res)) if not no_display: frame = cv2.resize(frame, (0, 0), fx=show_rate, fy=show_rate) cv2.imshow("Frame", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break