def run(self, args): input_stream = Visualizer.open_input_stream(args.input) if input_stream is None or not input_stream.isOpened(): log.error("Cannot open input stream: %s" % args.input) fps = input_stream.get(cv2.CAP_PROP_FPS) frame_size = (int(input_stream.get(cv2.CAP_PROP_FRAME_WIDTH)), int(input_stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) self.frame_count = int(input_stream.get(cv2.CAP_PROP_FRAME_COUNT)) log.info("Input stream info: %d x %d @ %.2f FPS" % \ (frame_size[0], frame_size[1], fps)) output_stream = Visualizer.open_output_stream(args.output, fps, frame_size) cameraImg = input_stream.read()[1] self.textureCoords = utils.getFaceTextureCoords( self.textureImg, self.mean3DShape, self.blendshapes, self.idxs2D, self.idxs3D, self.detector, self.predictor) self.renderer = FaceRendering.FaceRenderer(cameraImg, self.textureImg, self.textureCoords, self.mesh) self.process(input_stream, output_stream) # Release resources if output_stream: output_stream.release() if input_stream: input_stream.release() cv2.destroyAllWindows()
def process_video(self, in_filename, out_filename, face_filename, keep_audio=True): # extract audio clip from src if keep_audio == True: clip = VideoFileClip(in_filename) clip.audio.write_audiofile("./temp/src_audio.mp3", verbose=False) predictor_path = "./models/shape_predictor_68_face_landmarks.dat" # predictor_path = "./models/shape_predictor_81_face_landmarks.dat" # the smaller this value gets the faster the detection will work # if it is too small, the user's face might not be detected maxImageSizeForDetection = 320 detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor(predictor_path) mean3DShape, blendshapes, mesh, idxs3D, idxs2D = utils.load3DFaceModel( "./models/candide.npz") projectionModel = models.OrthographicProjectionBlendshapes( blendshapes.shape[0]) # open source video vidcap = cv2.VideoCapture(in_filename) # get some parameters from input video width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vidcap.get(cv2.CAP_PROP_FPS)) frames_count = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) # create a video writer for output res_filename = "./output/" + out_filename vidwriter = cv2.VideoWriter(res_filename, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) cameraImg = vidcap.read()[1] textureImg = cv2.imread(face_filename) textureCoords = utils.getFaceTextureCoords(textureImg, mean3DShape, blendshapes, idxs2D, idxs3D, detector, predictor) renderer = FaceRendering.FaceRenderer(cameraImg, textureImg, textureCoords, mesh) destShapes2D = utils.getFaceKeypoints(cameraImg, detector, predictor, maxImageSizeForDetection) destShape = destShapes2D[0] modelParams = projectionModel.getInitialParameters( mean3DShape[:, idxs3D], destShape[:, idxs2D]) # 3D model parameter optimization modelParams = NonLinearLeastSquares.GaussNewton( modelParams, projectionModel.residual, projectionModel.jacobian, ([mean3DShape[:, idxs3D], blendshapes[:, :, idxs3D] ], destShape[:, idxs2D]), verbose=0) # rendering the model to an image destShape3D = utils.getShape3D(mean3DShape, blendshapes, modelParams) destRenderedImg = renderer.render(destShape3D) self.progressBar.setRange(0, frames_count - 1) # iterate over the frames and apply the face swap for i in tqdm(range(frames_count - 1)): success, cameraImg = vidcap.read() self.progressBar.setValue(i + 1) if success != True: # no frames left => break break shapes2D = utils.getFaceKeypoints(cameraImg, detector, predictor, maxImageSizeForDetection) newImg = cameraImg try: if shapes2D is not None: for shape2D in shapes2D: # 3D model parameter initialization modelParams = projectionModel.getInitialParameters( mean3DShape[:, idxs3D], shape2D[:, idxs2D]) # 3D model parameter optimization modelParams = NonLinearLeastSquares.GaussNewton( modelParams, projectionModel.residual, projectionModel.jacobian, ([ mean3DShape[:, idxs3D], blendshapes[:, :, idxs3D] ], shape2D[:, idxs2D]), verbose=0) # rendering the model to an image shape3D = utils.getShape3D(mean3DShape, blendshapes, modelParams) renderedImg = renderer.render(shape3D) # blending of the rendered face with the image mask = np.copy(renderedImg[:, :, 0]) mask1 = np.copy(destRenderedImg[:, :, 0]) renderedImg = ImageProcessing.colorTransfer( cameraImg, renderedImg, mask) # newImg = ImageProcessing.blendImages(renderedImg, cameraImg, mask) newImg = ImageProcessing.blendImages0( renderedImg, cameraImg, mask, mask1) except: pass vidwriter.write(newImg) # releas video capture and writer vidcap.release() vidwriter.release() renderer.release() # apply audio clip to generated video if keep_audio == True: video = VideoFileClip("./output/proc_video.avi") video.write_videofile(out_filename, audio="./output/src_audio.mp3", progress_bar=False, verbose=False)
def guide_facechange(): def tmp(): print("") t = threading.Timer(GUIDE_SHOW_TIME, tmp) t.start() t_wait = threading.Timer(GUIDE_WAIT_TIME, tmp) t_wait.start() print("Press T to draw the keypoints and the 3D model") print("Press W to start recording to a video file") print("Press R to restart") print("Press Q or ESC to Quit") modelParams = None lockedTranslation = False drawOverlay = False global BACKGROUND_VID_PATH_NUM #cap = cv2.VideoCapture(VIDEO_CAPTURE_CAM_NUM) cap = cv2.VideoCapture(BACKGROUND_VID_PATH[BACKGROUND_VID_PATH_NUM]) writer = None cameraImg = cap.read()[1] # face swap하여 붙일 영상의 img textureImg = cv2.VideoCapture(VIDEO_CAPTURE_CAM_NUM).read()[1] #print("광고영상 shape : \t\t",cameraImg.shape[1],cameraImg.shape[0]) #print("카메라 캡쳐영상 shape : ",textureImg.shape[1],textureImg.shape[0]) ###### face detection with guide cap_guide_cam = cv2.VideoCapture(VIDEO_CAPTURE_CAM_NUM) if (cap_guide_cam.isOpened() == False): print("Unable to read camera feed") frame_width = int(cap_guide_cam.get(3)) frame_height = int(cap_guide_cam.get(4)) str="match your face" str2="O" str3="ATTENTION" while(True): ret, frame = cap_guide_cam.read() frame_org = frame cv2.putText(frame,str,(int(frame_width/3),int(frame_height/6)),cv2.FONT_HERSHEY_SIMPLEX,int(frame_width/600),(0,0,0),int(frame_width/300)) cv2.putText(frame,str2,(int(frame_width/3),int(frame_width/2)),cv2.FONT_HERSHEY_SIMPLEX,int(frame_width/60),(0,0,255),int(frame_width/300)) cv2.putText(frame,str3,(int((frame_width*2)/3),int((frame_height*2)/3)),cv2.FONT_HERSHEY_SIMPLEX,int(frame_width/650),(0,0,0),int(frame_width/300)) cv2.namedWindow("frame", cv2.WND_PROP_FULLSCREEN) cv2.setWindowProperty("frame", cv2.WND_PROP_FULLSCREEN, 1) cv2.imshow('frame',frame) if cv2.waitKey(1) & 0xFF == ord('q'): break if not t_wait.isAlive(): dets = detector(frame_org, 1) #처음 camera로 촬영한 캡쳐를 넣어서 얼굴을 찾음. if len(dets) > 0: print("detected") break else: print("now detecting") if not t.isAlive(): video() # 찍은 영상의 캡쳐를 3D로 재구성하여 합침 textureCoords = utils.getFaceTextureCoords(textureImg, mean3DShape, blendshapes, idxs2D, idxs3D, detector, predictor) # 찍은 얼굴의 데이터를 영상의 얼굴에 rendering renderer = FaceRendering.FaceRenderer(cameraImg, textureImg, textureCoords, mesh) doProcess=False meanTime=[[0]*4 for i in range(4)] while True: #영상 캡쳐 cameraImg = cap.read()[1] shapes2D = utils.getFaceKeypoints(cameraImg, detector, predictor, maxImageSizeForDetection) doProcess = not doProcess if doProcess is not True: continue else: if shapes2D is not None: for shape2D in shapes2D: start = timeit.default_timer() #3D model parameter initialization modelParams = projectionModel.getInitialParameters(mean3DShape[:, idxs3D], shape2D[:, idxs2D]) stop = timeit.default_timer() meanTime[0][0]+=stop-start meanTime[0][1]+=1 #print(1, float(meanTime[0][0]/meanTime[0][1])) start = timeit.default_timer() #3D model parameter optimization modelParams = NonLinearLeastSquares.GaussNewton(modelParams, projectionModel.residual, projectionModel.jacobian, ([mean3DShape[:, idxs3D], blendshapes[:, :, idxs3D]], shape2D[:, idxs2D]), verbose=0) stop = timeit.default_timer() meanTime[1][0]+=stop-start meanTime[1][1]+=1 #print(2, float(meanTime[1][0]/meanTime[1][1])) start = timeit.default_timer() #rendering the model to an image #다듬기 shape3D = utils.getShape3D(mean3DShape, blendshapes, modelParams) renderedImg = renderer.render(shape3D) stop = timeit.default_timer() meanTime[2][0]+=stop-start meanTime[2][1]+=1 #print(3, float(meanTime[2][0]/meanTime[2][1]))\ start = timeit.default_timer() #blending of the rendered face with the image mask = np.copy(renderedImg[:, :, 0]) renderedImg = ImageProcessing.colorTransfer(cameraImg, renderedImg, mask) cameraImg = ImageProcessing.blendImages(renderedImg, cameraImg, mask) stop = timeit.default_timer() meanTime[3][0] += stop - start meanTime[3][1] += 1 #print(4, float(meanTime[3][0] / meanTime[3][1])) #drawing of the mesh and keypoints # 't'를 누를 때, 적용. facepoint가 표시됨. if drawOverlay: drawPoints(cameraImg, shape2D.T) drawProjectedShape(cameraImg, [mean3DShape, blendshapes], projectionModel, mesh, modelParams, lockedTranslation) if writer is not None: writer.write(cameraImg) cv2.namedWindow("image", cv2.WND_PROP_FULLSCREEN) cv2.setWindowProperty("image", cv2.WND_PROP_FULLSCREEN, 1) cv2.imshow('image',cameraImg) key = cv2.waitKey(1) if key == 27 or key == ord('q'): break if key == ord('t'): drawOverlay = not drawOverlay if key == ord('r'): cv2.destroyAllWindows() video() if key == ord('1'): cv2.destroyAllWindows() BACKGROUND_VID_PATH_NUM = 0 guide_facechange() break if key == ord('2'): cv2.destroyAllWindows() BACKGROUND_VID_PATH_NUM = 1 guide_facechange() break if key == ord('3'): cv2.destroyAllWindows() BACKGROUND_VID_PATH_NUM = 2 guide_facechange() break if key == ord('4'): cv2.destroyAllWindows() BACKGROUND_VID_PATH_NUM = 3 guide_facechange() break if key == ord('5'): cv2.destroyAllWindows() BACKGROUND_VID_PATH_NUM = 4 guide_facechange() break if key == ord('w'): if writer is None: print("Starting video writer") writer = cv2.VideoWriter(SAVE_VID_PATH, cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), 13, (cameraImg.shape[1], cameraImg.shape[0])) if writer.isOpened(): print("Writer succesfully opened") else: writer = None print("Writer opening failed") else: print("Stopping video writer") writer.release() writer = None cap.release() cap_intro_vid.release() cap_guide_cam.release() cv2.destroyAllWindows()
predictor_path = "shape_predictor_68_face_landmarks.dat" image_name = "Trump.jpg" maxImageSizeForDetection = 400 detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor(predictor_path) mean3DShape, blendshapes, mesh, idxs3D, idxs2D = utils.load3DFaceModel("candide.npz") idxs3D, idxs2D = utils.refine_idxs(idxs3D, idxs2D) projectionModel = models.OrthographicProjectionBlendshapes(blendshapes.shape[0]) # Init 3D model cap = cv2.VideoCapture(0) cameraImg = cap.read()[1] textureImg = cv2.imread(image_name) textureImg = cv2.resize(textureImg, (textureImg.shape[1] / 4, textureImg.shape[0] / 4)) cameraImg = cv2.resize(cameraImg, (cameraImg.shape[1] / 4, cameraImg.shape[0] / 4)) textureCoords = utils.getFaceTextureCoords(textureImg, mean3DShape, blendshapes, idxs2D, idxs3D, detector, predictor) renderer = FaceRendering.FaceRenderer(cameraImg, textureImg, textureCoords, mesh) # Init GAN model graph = gan_func.load_graph("frozen_model_500.pb") image_tensor = graph.get_tensor_by_name('image_tensor:0') output_tensor = graph.get_tensor_by_name('generate_output/output:0') sess = tf.Session(graph=graph) # Init SR model sr_model = torch.load("model_scale_3_batch_4_epoch_500.pth") # Start running count = 0 while True: # Skip frame
def guide_facechange(): def tmp(): print("") t = threading.Timer(GUIDE_SHOW_TIME, tmp) t.start() print("Press T to draw the keypoints and the 3D model") print("Press R to start recording to a video file") #loading the keypoint detection model, the image and the 3D model predictor_path = "../shape_predictor_68_face_landmarks.dat" image_name = "../data/jolie.jpg" #the smaller this value gets the faster the detection will work #if it is too small, the user's face might not be detected maxImageSizeForDetection = 320 #카메라로 찍히는 영상에서 얼굴을 찾는 detector detector = dlib.get_frontal_face_detector() #사람 얼굴을 찾는 입과 눈의 구석, 코의 끝과 같은 중요한 얼굴 표식의 위치를 식별하는 점들의 집합 predictor = dlib.shape_predictor(predictor_path) # candide = 3D face model source # mean3Dshape : 얼굴의 중립상태에 해당하는 정점 리스트 # blendshapes : 중립상태인 얼굴에서 추가하여 수정할 수 있는 얼굴 # ex) 미소, 눈썹 올라가는 부분 # candide에 정의된 애니메이션 Units에서 파생된다. # mesh : Candide가 얼굴 목록으로 제공한 원래의 mesh # idxs3D, idxs2D: Candide 모델(idxs3D)과 얼굴 정렬점 세트(idxs2D)사이에 해당하는 지점들의 인덱스들이다. mean3DShape, blendshapes, mesh, idxs3D, idxs2D = utils.load3DFaceModel( "../candide.npz") # projectionModel = models.OrthographicProjectionBlendshapes( blendshapes.shape[0]) modelParams = None lockedTranslation = False drawOverlay = False #cap = cv2.VideoCapture(0) cap = cv2.VideoCapture("../ROEM 2014 Spring SUZY 320p.mp4") writer = None cameraImg = cap.read()[1] # face swap하여 붙일 영상의 img textureImg = cv2.VideoCapture(0).read()[1] #cv2.imread(image_name) print("광고영상 shape : \t\t", cameraImg.shape[1], cameraImg.shape[0]) print("카메라 캡쳐영상 shape : ", textureImg.shape[1], textureImg.shape[0]) ###### face detection with guide cap_guide_cam = cv2.VideoCapture(0) if (cap_guide_cam.isOpened() == False): print("Unable to read camera feed") frame_width = int(cap_guide_cam.get(3)) frame_height = int(cap_guide_cam.get(4)) str = "match your face" str2 = "O" str3 = "ATTENTION" while (True): ret, frame = cap_guide_cam.read() frame_org = frame cv2.putText(frame, str, (int(frame_width / 3), int(frame_height / 6)), cv2.FONT_HERSHEY_SIMPLEX, int(frame_width / 600), (0, 0, 0), int(frame_width / 300)) cv2.putText(frame, str2, (int(frame_width / 3), int(frame_width / 2)), cv2.FONT_HERSHEY_SIMPLEX, int(frame_width / 60), (0, 0, 255), int(frame_width / 300)) cv2.putText(frame, str3, (int( (frame_width * 2) / 3), int( (frame_height * 2) / 3)), cv2.FONT_HERSHEY_SIMPLEX, int(frame_width / 650), (0, 0, 0), int(frame_width / 300)) cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break dets = detector(frame_org, 1) #처음 camera로 촬영한 캡쳐를 넣어서 얼굴을 찾음. if len(dets) > 0: print("detected") break else: print("now detecting") if not t.isAlive(): video() break # 찍은 영상의 캡쳐를 3D로 재구성하여 합침 textureCoords = utils.getFaceTextureCoords(textureImg, mean3DShape, blendshapes, idxs2D, idxs3D, detector, predictor) # 찍은 얼굴의 데이터를 영상의 얼굴에 rendering renderer = FaceRendering.FaceRenderer(cameraImg, textureImg, textureCoords, mesh) doProcess = False meanTime = [[0] * 4 for i in range(4)] while True: #영상 캡쳐 cameraImg = cap.read()[1] shapes2D = utils.getFaceKeypoints(cameraImg, detector, predictor, maxImageSizeForDetection) doProcess = not doProcess if doProcess is not True: continue else: if shapes2D is not None: for shape2D in shapes2D: start = timeit.default_timer() #3D model parameter initialization modelParams = projectionModel.getInitialParameters( mean3DShape[:, idxs3D], shape2D[:, idxs2D]) stop = timeit.default_timer() meanTime[0][0] += stop - start meanTime[0][1] += 1 #print(1, float(meanTime[0][0]/meanTime[0][1])) start = timeit.default_timer() #3D model parameter optimization modelParams = NonLinearLeastSquares.GaussNewton( modelParams, projectionModel.residual, projectionModel.jacobian, ([mean3DShape[:, idxs3D], blendshapes[:, :, idxs3D] ], shape2D[:, idxs2D]), verbose=0) stop = timeit.default_timer() meanTime[1][0] += stop - start meanTime[1][1] += 1 #print(2, float(meanTime[1][0]/meanTime[1][1])) start = timeit.default_timer() #rendering the model to an image #다듬기 shape3D = utils.getShape3D(mean3DShape, blendshapes, modelParams) renderedImg = renderer.render(shape3D) stop = timeit.default_timer() meanTime[2][0] += stop - start meanTime[2][1] += 1 #print(3, float(meanTime[2][0]/meanTime[2][1])) start = timeit.default_timer() #blending of the rendered face with the image mask = np.copy(renderedImg[:, :, 0]) renderedImg = ImageProcessing.colorTransfer( cameraImg, renderedImg, mask) cameraImg = ImageProcessing.blendImages( renderedImg, cameraImg, mask) stop = timeit.default_timer() meanTime[3][0] += stop - start meanTime[3][1] += 1 #print(4, float(meanTime[3][0] / meanTime[3][1])) #drawing of the mesh and keypoints # 't'를 누를 때, 적용. facepoint가 표시됨. if drawOverlay: drawPoints(cameraImg, shape2D.T) drawProjectedShape(cameraImg, [mean3DShape, blendshapes], projectionModel, mesh, modelParams, lockedTranslation) if writer is not None: writer.write(cameraImg) cv2.namedWindow("image", cv2.WND_PROP_FULLSCREEN) cv2.setWindowProperty("image", cv2.WND_PROP_FULLSCREEN, 1) cv2.imshow('image', cameraImg) key = cv2.waitKey(1) if key == 27 or key == ord('q'): break if key == ord('t'): drawOverlay = not drawOverlay if key == ord('r'): cv2.destroyAllWindows() video() if key == ord('w'): if writer is None: print("Starting video writer") writer = cv2.VideoWriter( "../out.avi", cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), 13, (cameraImg.shape[1], cameraImg.shape[0])) if writer.isOpened(): print("Writer succesfully opened") else: writer = None print("Writer opening failed") else: print("Stopping video writer") writer.release() writer = None cap.release() cap_intro_vid.release() cap_guide_cam.release() cv2.destroyAllWindows()
textureCoords1 = None print "-----" print "Press SPACE to scan the first face." print "-----" while textureCoords1 is None: cameraImg = cap.read()[1] cv2.imshow('Face Swapper', cameraImg) key = cv2.waitKey(1) if key % 256 == 32: try: textureCoords1 = utils.getFaceTextureCoords( cameraImg, mean3DShape, blendshapes, idxs2D, idxs3D, detector, predictor) textureImg1 = cameraImg print "First face scanned." except: print "Please try again." pass textureImg2 = None textureCoords2 = None print "-----" print "Press SPACE to scan the second face." print "-----" while textureCoords2 is None:
detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor(predictor_path) mean3DShape, blendshapes, mesh, idxs3D, idxs2D = utils.load3DFaceModel("../candide.npz") projectionModel = models.OrthographicProjectionBlendshapes(blendshapes.shape[0]) modelParams = None lockedTranslation = False drawOverlay = False cap = cv2.VideoCapture(0) writer = None cameraImg = cap.read()[1] textureImg = cv2.imread(image_name) textureCoords = utils.getFaceTextureCoords(textureImg, mean3DShape, blendshapes, idxs2D, idxs3D, detector, predictor) renderer = FaceRendering.FaceRenderer(cameraImg, textureImg, textureCoords, mesh) while True: cameraImg = cap.read()[1] shapes2D = utils.getFaceKeypoints(cameraImg, detector, predictor, maxImageSizeForDetection) if shapes2D is not None: for shape2D in shapes2D: #3D model parameter initialization modelParams = projectionModel.getInitialParameters(mean3DShape[:, idxs3D], shape2D[:, idxs2D]) #3D model parameter optimization modelParams = NonLinearLeastSquares.GaussNewton(modelParams, projectionModel.residual, projectionModel.jacobian, ([mean3DShape[:, idxs3D], blendshapes[:, :, idxs3D]], shape2D[:, idxs2D]), verbose=0) #rendering the model to an image
maxImageSizeForDetection = 320 #detector = dlib.get_frontal_face_detector() #predictor = dlib.shape_predictor(predictor_path) mean3DShape, blendshapes, mesh, idxs3D, idxs2D = utils.load3DFaceModel("candide.npz") projectionModel = models.OrthographicProjectionBlendshapes(blendshapes.shape[0]) modelParams = None lockedTranslation = False drawOverlay = False cap = cv2.VideoCapture(0) writer = None cameraImg = cap.read()[1] textureImg = cv2.imread(image_name) textureCoords = utils.getFaceTextureCoords(textureImg, mean3DShape, blendshapes, idxs2D, idxs3D, openpose) renderer = FaceRendering.FaceRenderer(cameraImg, textureImg, textureCoords, mesh) #arr, arr2, output_image = openpose.forward(img, False) while True: cameraImg = cap.read()[1] cameraImg = cv2.flip(cameraImg, 1) arr, shapes2D, output_image = openpose.forward(cameraImg, False) shapes2D = shapes2D[:,:,:2] if shapes2D is not None: for shape2D in shapes2D: #3D model parameter initialization count = 0; shape2D = shape2D.T for x in shape2D[0]: if x <= 0.0 or x >= cameraImg.shape[1] :