def __init__(self, master, poser: Poser, face_detector, landmark_locator, video_capture, torch_device: torch.device): self.master = master self.poser = poser self.face_detector = face_detector self.landmark_locator = landmark_locator self.video_capture = video_capture self.torch_device = torch_device self.head_pose_solver = HeadPoseSolver() self.master.title("Puppeteer") self.master.protocol("WM_DELETE_WINDOW", self.on_closing) top_frame = Frame(self.master) top_frame.pack() if True: source_image_frame = Frame(top_frame, width=256, height=256) source_image_frame.pack_propagate(0) source_image_frame.pack(side=LEFT) self.source_image_label = Label(source_image_frame, text="Nothing yet!") self.source_image_label.pack(fill=BOTH, expand=True) if True: control_frame = Frame(top_frame, width=256, height=192) control_frame.pack_propagate(0) control_frame.pack(side=LEFT) self.video_capture_label = Label(control_frame, text="Nothing yet!") self.video_capture_label.pack(fill=BOTH, expand=True) if True: posed_image_frame = Frame(top_frame, width=256, height=256) posed_image_frame.pack_propagate(0) posed_image_frame.pack(side=LEFT, fill='y') self.posed_image_label = Label(posed_image_frame, text="Nothing yet!") self.posed_image_label.pack(fill=BOTH, expand=True) bottom_frame = Frame(self.master) bottom_frame.pack(fill='x') self.load_source_image_button = Button(bottom_frame, text="Load Image ...", relief=GROOVE, command=self.load_image) self.load_source_image_button.pack(fill='x') self.pose_size = len(self.poser.pose_parameters()) self.source_image = None self.posed_image = None self.current_pose = None self.last_pose = None self.master.after(1000 // 60, self.update_image())
def __init__(self, dst_image, poser: Poser, face_detector, landmark_locator, torch_device: torch.device): self.poser = poser self.face_detector = face_detector self.landmark_locator = landmark_locator #self.video_capture = video_capture self.torch_device = torch_device self.head_pose_solver = HeadPoseSolver() self.pose_size = len(self.poser.pose_parameters()) self.source_image = None self.posed_image = None self.current_pose = None self.last_pose = None self.load_image_from_file(dst_image)
def __init__(self): self.torch_device = cuda self.poser = MorphRotateCombinePoser256Param6( morph_module_spec=FaceMorpherSpec(), morph_module_file_name="../data/face_morpher.pt", rotate_module_spec=TwoAlgoFaceRotatorSpec(), rotate_module_file_name="../data/two_algo_face_rotator.pt", combine_module_spec=CombinerSpec(), combine_module_file_name="../data/combiner.pt", device=cuda) # self.face_detector = dlib.get_frontal_face_detector() # self.landmark_locator = dlib.shape_predictor("../data/shape_predictor_68_face_landmarks.dat") self.head_pose_solver = HeadPoseSolver() self.pose_size = len(self.poser.pose_parameters())
class PuppeteerApp: def __init__(self, dst_image, poser: Poser, face_detector, landmark_locator, torch_device: torch.device): self.poser = poser self.face_detector = face_detector self.landmark_locator = landmark_locator #self.video_capture = video_capture self.torch_device = torch_device self.head_pose_solver = HeadPoseSolver() self.pose_size = len(self.poser.pose_parameters()) self.source_image = None self.posed_image = None self.current_pose = None self.last_pose = None self.load_image_from_file(dst_image) def load_image_from_file(self, file_name): self.source_image = extract_pytorch_image_from_filelike(file_name).to( self.torch_device).unsqueeze(dim=0) def update_image(self, frame): rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) faces = self.face_detector(rgb_frame) euler_angles = None face_landmarks = None if len(faces) > 0: face_rect = faces[0] face_landmarks = self.landmark_locator(rgb_frame, face_rect) face_box_points, euler_angles = self.head_pose_solver.solve_head_pose( face_landmarks) resized_frame = cv2.flip(cv2.resize(rgb_frame, (192, 256)), 1) if euler_angles is not None and self.source_image is not None: self.current_pose = torch.zeros(self.pose_size, device=self.torch_device) self.current_pose[0] = max(min(-euler_angles.item(0) / 15.0, 1.0), -1.0) self.current_pose[1] = max(min(-euler_angles.item(1) / 15.0, 1.0), -1.0) self.current_pose[2] = max(min(euler_angles.item(2) / 15.0, 1.0), -1.0) if self.last_pose is None: self.last_pose = self.current_pose else: self.current_pose = self.current_pose * 0.5 + self.last_pose * 0.5 self.last_pose = self.current_pose eye_min_ratio = 0.15 eye_max_ratio = 0.25 left_eye_normalized_ratio = compute_left_eye_normalized_ratio( face_landmarks, eye_min_ratio, eye_max_ratio) self.current_pose[3] = 1 - left_eye_normalized_ratio right_eye_normalized_ratio = compute_right_eye_normalized_ratio( face_landmarks, eye_min_ratio, eye_max_ratio) self.current_pose[4] = 1 - right_eye_normalized_ratio min_mouth_ratio = 0.02 max_mouth_ratio = 0.3 mouth_normalized_ratio = compute_mouth_normalized_ratio( face_landmarks, min_mouth_ratio, max_mouth_ratio) self.current_pose[5] = mouth_normalized_ratio self.current_pose = self.current_pose.unsqueeze(dim=0) posed_image = self.poser.pose(self.source_image, self.current_pose).detach().cpu() numpy_image = rgba_to_numpy_image(posed_image[0]) numpy_image = cv2.cvtColor(numpy_image, cv2.COLOR_BGR2RGB) #pil_image = PIL.Image.fromarray(np.uint8(np.rint(numpy_image * 255.0)), mode='RGBA') return numpy_image
class PuppeteerApp: def __init__(self, master, poser: Poser, face_detector, landmark_locator, video_capture, torch_device: torch.device, RGB): self.master = master self.poser = poser self.face_detector = face_detector self.landmark_locator = landmark_locator self.video_capture = video_capture self.torch_device = torch_device self.head_pose_solver = HeadPoseSolver() if RGB == 'R': self.BGcolor = "red" elif RGB == 'B': self.BGcolor = "blue" else: self.BGcolor = "green" self.master.title("Puppeteer") self.master.protocol("WM_DELETE_WINDOW", self.on_closing) top_frame = Frame(self.master) top_frame.pack() if True: source_image_frame = Frame(top_frame, width=256, height=256) source_image_frame.pack_propagate(0) # source_image_frame.pack(side=LEFT) self.source_image_label = Label(source_image_frame, text="Nothing yet!") # self.source_image_label.pack(fill=BOTH, expand=True) # if True: # control_frame = Frame(top_frame, width=256, height=192) # control_frame.pack_propagate(0) # control_frame.pack(side=LEFT) # self.video_capture_label = Label(control_frame, text="Nothing yet!") # self.video_capture_label.pack(fill=BOTH, expand=True) if True: posed_image_frame = Frame(top_frame, width=256, height=256) posed_image_frame.pack_propagate(0) posed_image_frame.pack(side=LEFT, fill='y') self.posed_image_label = Label(posed_image_frame, text="Nothing yet!") self.posed_image_label.pack(fill=BOTH, expand=True) # bottom_frame = Frame(self.master) # bottom_frame.pack(fill='x') # self.load_source_image_button = Button(bottom_frame, text="Load Image ...", relief=GROOVE, # command=self.load_image) # self.load_source_image_button.pack(fill='x') self.pose_size = len(self.poser.pose_parameters()) self.source_image = None self.posed_image = None self.current_pose = None self.last_pose = None self.load_image() self.master.after(1000 // 60, self.update_image()) def load_image(self): file_name = filedialog.askopenfilename(filetypes=[("PNG", '*.png')], initialdir="data/illust") if len(file_name) > 0: self.load_image_from_file(file_name) def load_image_from_file(self, file_name): image = PhotoImage(file=file_name) if image.width() != self.poser.image_size() or image.height( ) != self.poser.image_size(): message = "The loaded image has size %dx%d, but we require %dx%d." \ % (image.width(), image.height(), self.poser.image_size(), self.poser.image_size()) messagebox.showerror("Wrong image size!", message) self.source_image_label.configure(image=image, text="", background=self.BGcolor) self.source_image_label.image = image # self.source_image_label.pack() self.source_image = extract_pytorch_image_from_filelike(file_name).to( self.torch_device).unsqueeze(dim=0) def update_image(self): there_is_frame, frame = self.video_capture.read() if not there_is_frame: return rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) faces = self.face_detector(rgb_frame) euler_angles = None face_landmarks = None if len(faces) > 0: face_rect = faces[0] face_landmarks = self.landmark_locator(rgb_frame, face_rect) face_box_points, euler_angles = self.head_pose_solver.solve_head_pose( face_landmarks) # self.draw_face_landmarks(rgb_frame, face_landmarks) # self.draw_face_box(rgb_frame, face_box_points) resized_frame = cv2.flip(cv2.resize(rgb_frame, (192, 256)), 1) pil_image = PIL.Image.fromarray(resized_frame, mode='RGB') photo_image = PIL.ImageTk.PhotoImage(image=pil_image) # self.video_capture_label.configure(image=photo_image, text="") # self.video_capture_label.image = photo_image # self.video_capture_label.pack() if euler_angles is not None and self.source_image is not None: self.current_pose = torch.zeros(self.pose_size, device=self.torch_device) self.current_pose[0] = max(min(-euler_angles.item(0) / 15.0, 1.0), -1.0) self.current_pose[1] = max(min(-euler_angles.item(1) / 15.0, 1.0), -1.0) self.current_pose[2] = max(min(euler_angles.item(2) / 15.0, 1.0), -1.0) if self.last_pose is None: self.last_pose = self.current_pose else: self.current_pose = self.current_pose * 0.5 + self.last_pose * 0.5 self.last_pose = self.current_pose eye_min_ratio = 0.15 eye_max_ratio = 0.25 left_eye_normalized_ratio = compute_left_eye_normalized_ratio( face_landmarks, eye_min_ratio, eye_max_ratio) self.current_pose[3] = 1 - left_eye_normalized_ratio right_eye_normalized_ratio = compute_right_eye_normalized_ratio( face_landmarks, eye_min_ratio, eye_max_ratio) self.current_pose[4] = 1 - right_eye_normalized_ratio min_mouth_ratio = 0.02 max_mouth_ratio = 0.3 mouth_normalized_ratio = compute_mouth_normalized_ratio( face_landmarks, min_mouth_ratio, max_mouth_ratio) self.current_pose[5] = mouth_normalized_ratio self.current_pose = self.current_pose.unsqueeze(dim=0) posed_image = self.poser.pose(self.source_image, self.current_pose).detach().cpu() numpy_image = rgba_to_numpy_image(posed_image[0]) pil_image = PIL.Image.fromarray(np.uint8( np.rint(numpy_image * 255.0)), mode='RGBA') photo_image = PIL.ImageTk.PhotoImage(image=pil_image) self.posed_image_label.configure(image=photo_image, text="", background=self.BGcolor) self.posed_image_label.image = photo_image self.posed_image_label.pack() self.master.after(1000 // 60, self.update_image) def on_closing(self): self.video_capture.release() self.master.destroy()
def __init__(self, master, poser: Poser, face_detector, landmark_locator, video_capture, torch_device: torch.device, model): self.model = model self.master = master self.poser = poser self.face_detector = face_detector self.landmark_locator = landmark_locator self.video_capture = video_capture self.torch_device = torch_device self.head_pose_solver = HeadPoseSolver() self.isSuperResolution = StringVar(self.master) self.isSuperResolution.set("SR-False") self.SuperResolution = SuperResolutionModule() self.master.title("Puppeteer") self.master.protocol("WM_DELETE_WINDOW", self.on_closing) top_frame = Frame(self.master) top_frame.pack() if True: source_image_frame = Frame(top_frame, width=512, height=512) # change box size *2 source_image_frame.pack_propagate(0) source_image_frame.pack(side=LEFT) self.source_image_label = Label(source_image_frame, text="Nothing yet!") self.source_image_label.pack(fill=BOTH, expand=True) if True: control_frame = Frame(top_frame, width=512, height=192) # change box size *2 control_frame.pack_propagate(0) control_frame.pack(side=LEFT) self.video_capture_label = Label(control_frame, text="Nothing yet!") self.video_capture_label.pack(fill=BOTH, expand=True) if True: posed_image_frame = Frame(top_frame, width=512, height=512) # change box size *2 posed_image_frame.pack_propagate(0) posed_image_frame.pack(side=LEFT, fill='y') self.posed_image_label = Label(posed_image_frame, text="Nothing yet!") self.posed_image_label.pack(fill=BOTH, expand=True) bottom_frame = Frame(self.master) bottom_frame.pack(fill='x') self.super_resolution_check_button = OptionMenu( bottom_frame, self.isSuperResolution, " SR-True ", # Shitcode.. " SR-False ") self.super_resolution_check_button.pack(fill='x') self.load_source_image_button = Button(bottom_frame, text="Load Image ...", relief=GROOVE, command=self.load_image) self.load_source_image_button.pack(fill='x') self.generate_source_image_button = Button( bottom_frame, text="Generate Image ...", relief=GROOVE, command=self.genereate_image) self.generate_source_image_button.pack(fill='x') self.pose_size = len(self.poser.pose_parameters()) self.source_image = None self.posed_image = None self.current_pose = None self.last_pose = None self.master.after(1000 // 60, self.update_image())
class PuppeteerApp: def __init__(self, master, poser: Poser, face_detector, landmark_locator, video_capture, torch_device: torch.device, model): self.model = model self.master = master self.poser = poser self.face_detector = face_detector self.landmark_locator = landmark_locator self.video_capture = video_capture self.torch_device = torch_device self.head_pose_solver = HeadPoseSolver() self.isSuperResolution = StringVar(self.master) self.isSuperResolution.set("SR-False") self.SuperResolution = SuperResolutionModule() self.master.title("Puppeteer") self.master.protocol("WM_DELETE_WINDOW", self.on_closing) top_frame = Frame(self.master) top_frame.pack() if True: source_image_frame = Frame(top_frame, width=512, height=512) # change box size *2 source_image_frame.pack_propagate(0) source_image_frame.pack(side=LEFT) self.source_image_label = Label(source_image_frame, text="Nothing yet!") self.source_image_label.pack(fill=BOTH, expand=True) if True: control_frame = Frame(top_frame, width=512, height=192) # change box size *2 control_frame.pack_propagate(0) control_frame.pack(side=LEFT) self.video_capture_label = Label(control_frame, text="Nothing yet!") self.video_capture_label.pack(fill=BOTH, expand=True) if True: posed_image_frame = Frame(top_frame, width=512, height=512) # change box size *2 posed_image_frame.pack_propagate(0) posed_image_frame.pack(side=LEFT, fill='y') self.posed_image_label = Label(posed_image_frame, text="Nothing yet!") self.posed_image_label.pack(fill=BOTH, expand=True) bottom_frame = Frame(self.master) bottom_frame.pack(fill='x') self.super_resolution_check_button = OptionMenu( bottom_frame, self.isSuperResolution, " SR-True ", # Shitcode.. " SR-False ") self.super_resolution_check_button.pack(fill='x') self.load_source_image_button = Button(bottom_frame, text="Load Image ...", relief=GROOVE, command=self.load_image) self.load_source_image_button.pack(fill='x') self.generate_source_image_button = Button( bottom_frame, text="Generate Image ...", relief=GROOVE, command=self.genereate_image) self.generate_source_image_button.pack(fill='x') self.pose_size = len(self.poser.pose_parameters()) self.source_image = None self.posed_image = None self.current_pose = None self.last_pose = None self.master.after(1000 // 60, self.update_image()) def load_image(self): file_name = filedialog.askopenfilename(initialdir="data/illust") if len(file_name) > 0: self.load_image_from_file(file_name) def genereate_image(self): self.load_image_from_file(generate=True) @staticmethod def read_image(img_path): return cv2.imread(img_path, cv2.IMREAD_COLOR) @staticmethod def generate_waifus(steps=3): waifu = waifulabs.GenerateWaifu() if steps > 1: waifu = waifulabs.GenerateWaifu(seeds=waifu.seeds, step=2) if steps > 2: waifu = waifulabs.GenerateWaifu(seeds=waifu.seeds, step=3) export_waifu = np.array(PIL.Image.open(io.BytesIO(waifu.bytes))) export_waifu = cv2.cvtColor(export_waifu, cv2.COLOR_BGR2RGB) return export_waifu @staticmethod def crop_face(image, lbp_path): cascade = cv2.CascadeClassifier(lbp_path) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray = cv2.equalizeHist(gray) faces = cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(24, 24)) if len(faces) != 1: raise Exception("{} anime faces identified in the image".format( len(faces))) for (x, y, w, h) in faces: cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2) new_img = image_rgb[y:y + h, x:x + w] return new_img @staticmethod def prepare_image(cropped_head): resized_head = PIL.Image.fromarray(cropped_head).resize( (128, 128)).convert('RGBA') output_image = PIL.Image.new("RGBA", (256, 256), (0, 0, 0, 0)) output_image.paste(resized_head, (64, 64)) return output_image def process_image(self, input="upload", img_path=None, lbp_path='./data/lbpcascade_animeface.xml', steps=3): if input == 'upload': img = self.read_image(img_path) elif input == 'generate': img = self.generate_waifus(steps=steps) else: raise Exception("Unknown input argument") cropped_head = self.crop_face(img, lbp_path) resultant_image = self.prepare_image(cropped_head) return resultant_image def load_image_from_file(self, file_name=None, generate=False): if generate: image = self.process_image(input="generate") else: image = self.process_image(img_path=file_name) image.save('./data/illust/loaded_image.png') image = PhotoImage(file='./data/illust/loaded_image.png') if image.width() != self.poser.image_size() or image.height( ) != self.poser.image_size(): message = "The loaded image has size %dx%d, but we require %dx%d." \ % (image.width(), image.height(), self.poser.image_size(), self.poser.image_size()) messagebox.showerror("Wrong image size!", message) self.source_image_label.configure(image=image, text="") self.source_image_label.image = image self.source_image_label.pack() self.source_image = extract_pytorch_image_from_filelike( './data/illust/loaded_image.png').to( self.torch_device).unsqueeze(dim=0) os.remove('./data/illust/loaded_image.png') def update_image(self): there_is_frame, frame = self.video_capture.read() # cv2.imwrite("my.png",frame) if not there_is_frame: return rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) faces = self.face_detector(rgb_frame) euler_angles = None face_landmarks = None if len(faces) > 0: face_rect = faces[0] if self.model == "HOG": face_landmarks = self.landmark_locator(rgb_frame, face_rect) else: face_landmarks = self.landmark_locator(rgb_frame, face_rect.rect) face_box_points, euler_angles = self.head_pose_solver.solve_head_pose( face_landmarks) self.draw_face_landmarks(rgb_frame, face_landmarks) self.draw_face_box(rgb_frame, face_box_points) resized_frame = cv2.flip(cv2.resize(rgb_frame, (192, 256)), 1) pil_image = PIL.Image.fromarray(resized_frame, mode='RGB') photo_image = PIL.ImageTk.PhotoImage(image=pil_image) self.video_capture_label.configure(image=photo_image, text="") self.video_capture_label.image = photo_image self.video_capture_label.pack() if euler_angles is not None and self.source_image is not None: self.current_pose = torch.zeros(self.pose_size, device=self.torch_device) self.current_pose[0] = max(min(-euler_angles.item(0) / 15.0, 1.0), -1.0) self.current_pose[1] = max(min(-euler_angles.item(1) / 15.0, 1.0), -1.0) self.current_pose[2] = max(min(euler_angles.item(2) / 15.0, 1.0), -1.0) if self.last_pose is None: self.last_pose = self.current_pose else: self.current_pose = self.current_pose * 0.5 + self.last_pose * 0.5 self.last_pose = self.current_pose eye_min_ratio = 0.15 eye_max_ratio = 0.25 left_eye_normalized_ratio = compute_left_eye_normalized_ratio( face_landmarks, eye_min_ratio, eye_max_ratio) self.current_pose[3] = 1 - left_eye_normalized_ratio right_eye_normalized_ratio = compute_right_eye_normalized_ratio( face_landmarks, eye_min_ratio, eye_max_ratio) self.current_pose[4] = 1 - right_eye_normalized_ratio min_mouth_ratio = 0.02 max_mouth_ratio = 0.3 mouth_normalized_ratio = compute_mouth_normalized_ratio( face_landmarks, min_mouth_ratio, max_mouth_ratio) self.current_pose[5] = mouth_normalized_ratio self.current_pose = self.current_pose.unsqueeze(dim=0) # TODO Core of demo, compress this posed_image = self.poser.pose(self.source_image, self.current_pose).detach().cpu() numpy_image = rgba_to_numpy_image(posed_image[0]) if self.isSuperResolution.get().strip() == "SR-True": pil_image = PIL.Image.fromarray(np.uint8( np.rint(numpy_image * 255.0)), mode='RGBA') pil_image = self.SuperResolution.inference(pil_image) pil_image = transparent_back(pil_image) else: pil_image = PIL.Image.fromarray(np.uint8( np.rint(numpy_image * 255.0)), mode='RGBA') photo_image = PIL.ImageTk.PhotoImage(image=pil_image) self.posed_image_label.configure(image=photo_image, text="") self.posed_image_label.image = photo_image self.posed_image_label.pack() self.master.after(1000 // 60, self.update_image) def on_closing(self): self.video_capture.release() self.master.destroy() def draw_face_box(self, frame, face_box_points): line_pairs = [[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]] for start, end in line_pairs: cv2.line(frame, face_box_points[start], face_box_points[end], (255, 0, 0), thickness=2) def draw_face_landmarks(self, frame, face_landmarks): for i in range(68): part = face_landmarks.part(i) x = part.x y = part.y cv2.rectangle(frame, (x - 1, y - 1), (x + 1, y + 1), (0, 255, 0), thickness=2)
class PuppeteerApp: def __init__(self, master, poser: Poser, face_detector, landmark_locator, video_capture, torch_device: torch.device): self.master = master self.poser = poser self.face_detector = face_detector self.landmark_locator = landmark_locator self.video_capture = video_capture self.torch_device = torch_device self.head_pose_solver = HeadPoseSolver() # SubWindow module self.sub_root = SubWindow(master) self.sub_root_failure = None self.master.title("Puppeteer") self.master.protocol("WM_DELETE_WINDOW", self.on_closing) top_frame = Frame(self.master) top_frame.pack() if True: source_image_frame = Frame(top_frame, width=256, height=256) source_image_frame.pack_propagate(0) source_image_frame.pack(side=LEFT) self.source_image_label = Label(source_image_frame, text="Nothing yet!") self.source_image_label.pack(fill=BOTH, expand=True) if True: control_frame = Frame(top_frame, width=256, height=192) control_frame.pack_propagate(0) control_frame.pack(side=LEFT) self.video_capture_label = Label(control_frame, text="Nothing yet!") self.video_capture_label.pack(fill=BOTH, expand=True) if True: posed_image_frame = Frame(top_frame, width=256, height=256) posed_image_frame.pack_propagate(0) posed_image_frame.pack(side=LEFT, fill='y') self.posed_image_label = Label(posed_image_frame, text="Nothing yet!") self.posed_image_label.pack(fill=BOTH, expand=True) bottom_frame = Frame(self.master) bottom_frame.pack(fill='x') self.load_source_image_button = Button(bottom_frame, text="Load Image ...", relief=GROOVE, command=self.load_image) self.load_source_image_button.pack(fill='x') self.btn_child = Button(bottom_frame, text="Create sub window", relief=GROOVE, command=self.sub_root.init) self.btn_child.pack(fill='x') self.pose_size = len(self.poser.pose_parameters()) self.source_image = None self.posed_image = None self.current_pose = None self.last_pose = None self.master.after(1000 // 60, self.update_image()) def load_image(self): file_name = filedialog.askopenfilename(filetypes=[("PNG", '*.png')], initialdir="data/illust") if len(file_name) > 0: self.load_image_from_file(file_name) def load_image_from_file(self, file_name): image = PhotoImage(file=file_name) if image.width() != self.poser.image_size() or image.height( ) != self.poser.image_size(): message = "The loaded image has size %dx%d, but we require %dx%d." \ % (image.width(), image.height(), self.poser.image_size(), self.poser.image_size()) messagebox.showerror("Wrong image size!", message) self.source_image_label.configure(image=image, text="") self.source_image_label.image = image self.source_image_label.pack() self.source_image = extract_pytorch_image_from_filelike(file_name).to( self.torch_device).unsqueeze(dim=0) def update_image(self): there_is_frame, frame = self.video_capture.read() if not there_is_frame: return rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) faces = self.face_detector(rgb_frame) euler_angles = None face_landmarks = None if len(faces) > 0: face_rect = faces[0] face_landmarks = self.landmark_locator(rgb_frame, face_rect) face_box_points, euler_angles = self.head_pose_solver.solve_head_pose( face_landmarks) self.draw_face_landmarks(rgb_frame, face_landmarks) self.draw_face_box(rgb_frame, face_box_points) resized_frame = cv2.flip(cv2.resize(rgb_frame, (192, 256)), 1) pil_image = PIL.Image.fromarray(resized_frame, mode='RGB') photo_image = PIL.ImageTk.PhotoImage(image=pil_image) self.video_capture_label.configure(image=photo_image, text="") self.video_capture_label.image = photo_image self.video_capture_label.pack() if euler_angles is not None and self.source_image is not None: self.current_pose = torch.zeros(self.pose_size, device=self.torch_device) self.current_pose[0] = max(min(-euler_angles.item(0) / 15.0, 1.0), -1.0) self.current_pose[1] = max(min(-euler_angles.item(1) / 15.0, 1.0), -1.0) self.current_pose[2] = max(min(euler_angles.item(2) / 15.0, 1.0), -1.0) if self.last_pose is None: self.last_pose = self.current_pose else: self.current_pose = self.current_pose * 0.5 + self.last_pose * 0.5 self.last_pose = self.current_pose eye_min_ratio = 0.15 eye_max_ratio = 0.25 left_eye_normalized_ratio = compute_left_eye_normalized_ratio( face_landmarks, eye_min_ratio, eye_max_ratio) self.current_pose[3] = 1 - left_eye_normalized_ratio right_eye_normalized_ratio = compute_right_eye_normalized_ratio( face_landmarks, eye_min_ratio, eye_max_ratio) self.current_pose[4] = 1 - right_eye_normalized_ratio min_mouth_ratio = 0.02 max_mouth_ratio = 0.3 mouth_normalized_ratio = compute_mouth_normalized_ratio( face_landmarks, min_mouth_ratio, max_mouth_ratio) self.current_pose[5] = mouth_normalized_ratio self.current_pose = self.current_pose.unsqueeze(dim=0) posed_image = self.poser.pose(self.source_image, self.current_pose).detach().cpu() numpy_image = rgba_to_numpy_image(posed_image[0]) pil_image = PIL.Image.fromarray(np.uint8( np.rint(numpy_image * 255.0)), mode='RGBA') photo_image = PIL.ImageTk.PhotoImage(image=pil_image) self.posed_image_label.configure(image=photo_image, text="") self.posed_image_label.image = photo_image self.posed_image_label.pack() # send image to subwindow self.sub_root.change_image(photo_image) self.master.after(1000 // 60, self.update_image) def on_closing(self): self.video_capture.release() self.master.destroy() def draw_face_box(self, frame, face_box_points): line_pairs = [[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]] for start, end in line_pairs: cv2.line(frame, face_box_points[start], face_box_points[end], (255, 0, 0), thickness=2) def draw_face_landmarks(self, frame, face_landmarks): for i in range(68): part = face_landmarks.part(i) x = part.x y = part.y cv2.rectangle(frame, (x - 1, y - 1), (x + 1, y + 1), (0, 255, 0), thickness=2)
def __init__(self, character_path: str, video_path: str, poser: Poser, face_detector, landmark_locator, torch_device: torch.device): self.poser = poser self.face_detector = face_detector self.landmark_locator = landmark_locator self.torch_device = torch_device self.head_pose_solver = HeadPoseSolver() self.pose_size = len(self.poser.pose_parameters()) self.source_image = None self.posed_image = None self.current_pose = None self.last_pose = None self.anime_video_frames = [] print(video_path) self.load_image(character_path) vidcap = cv2.VideoCapture(video_path) fps = vidcap.get(cv2.CAP_PROP_FPS) success, image = vidcap.read() parent_dir, video_file_name = os.path.split(video_path) video_file_name = os.path.splitext(video_file_name)[0] if (os.path.isfile(str(parent_dir) + str(video_file_name) + ".txt") == False): exit() _, character_file_name = os.path.split(character_path) character_file_name = os.path.splitext(character_file_name)[0] count = 0 while success: try: self.create_frame(image) except: os.system("rm " + str(parent_dir) + str(video_file_name) + ".txt") os.system("rm " + str(video_path)) exit() success, image = vidcap.read() count += 1 file_names = [] for i, frame in enumerate(self.anime_video_frames): im = Image.fromarray(self.crop_face(np.array(frame))).resize( (100, 100)) filename = "results/frame" + str(i) + ".png" file_names.append(filename) im.save(filename) clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(file_names, fps=fps) clip = clip.set_audio(VideoFileClip(video_path).audio) os.system("rm " + str(video_file_name) + ".txt") os.system("rm " + str(video_path)) clip.write_videofile(video_path, temp_audiofile='results/temp-audio.m4a', remove_temp=True, codec='libx264', audio_codec="aac")
class VideoPuppeteerApp: def __init__(self, character_path: str, video_path: str, poser: Poser, face_detector, landmark_locator, torch_device: torch.device): self.poser = poser self.face_detector = face_detector self.landmark_locator = landmark_locator self.torch_device = torch_device self.head_pose_solver = HeadPoseSolver() self.pose_size = len(self.poser.pose_parameters()) self.source_image = None self.posed_image = None self.current_pose = None self.last_pose = None self.anime_video_frames = [] print(video_path) self.load_image(character_path) vidcap = cv2.VideoCapture(video_path) fps = vidcap.get(cv2.CAP_PROP_FPS) success, image = vidcap.read() parent_dir, video_file_name = os.path.split(video_path) video_file_name = os.path.splitext(video_file_name)[0] if (os.path.isfile(str(parent_dir) + str(video_file_name) + ".txt") == False): exit() _, character_file_name = os.path.split(character_path) character_file_name = os.path.splitext(character_file_name)[0] count = 0 while success: try: self.create_frame(image) except: os.system("rm " + str(parent_dir) + str(video_file_name) + ".txt") os.system("rm " + str(video_path)) exit() success, image = vidcap.read() count += 1 file_names = [] for i, frame in enumerate(self.anime_video_frames): im = Image.fromarray(self.crop_face(np.array(frame))).resize( (100, 100)) filename = "results/frame" + str(i) + ".png" file_names.append(filename) im.save(filename) clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(file_names, fps=fps) clip = clip.set_audio(VideoFileClip(video_path).audio) os.system("rm " + str(video_file_name) + ".txt") os.system("rm " + str(video_path)) clip.write_videofile(video_path, temp_audiofile='results/temp-audio.m4a', remove_temp=True, codec='libx264', audio_codec="aac") def load_image(self, file_name: str): if len(file_name) > 0: self.load_image_from_file(file_name) def load_image_from_file(self, file_name): self.source_image = extract_pytorch_image_from_filelike(file_name).to( self.torch_device).unsqueeze(dim=0) def create_frame(self, frame): there_is_frame = True if not there_is_frame: return rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) faces = self.face_detector(rgb_frame) euler_angles = None face_landmarks = None if len(faces) > 0: face_rect = faces[0] face_landmarks = self.landmark_locator(rgb_frame, face_rect) face_box_points, euler_angles = self.head_pose_solver.solve_head_pose( face_landmarks) self.draw_face_landmarks(rgb_frame, face_landmarks) self.draw_face_box(rgb_frame, face_box_points) resized_frame = cv2.flip(cv2.resize(rgb_frame, (192, 256)), 1) pil_image = PIL.Image.fromarray(resized_frame, mode='RGB') if euler_angles is not None and self.source_image is not None: self.current_pose = torch.zeros(self.pose_size, device=self.torch_device) self.current_pose[0] = max(min(-euler_angles.item(0) / 15.0, 1.0), -1.0) self.current_pose[1] = max(min(-euler_angles.item(1) / 15.0, 1.0), -1.0) self.current_pose[2] = max(min(euler_angles.item(2) / 15.0, 1.0), -1.0) if self.last_pose is None: self.last_pose = self.current_pose else: self.current_pose = self.current_pose * 0.5 + self.last_pose * 0.5 self.last_pose = self.current_pose min_mouth_ratio = 0.02 max_mouth_ratio = 0.3 mouth_normalized_ratio = compute_mouth_normalized_ratio( face_landmarks, min_mouth_ratio, max_mouth_ratio) self.current_pose[5] = mouth_normalized_ratio self.current_pose = self.current_pose.unsqueeze(dim=0) posed_image = self.poser.pose(self.source_image, self.current_pose).detach().cpu() numpy_image = rgba_to_numpy_image(posed_image[0]) pil_image = PIL.Image.fromarray(np.uint8( np.rint(numpy_image * 255.0)), mode='RGBA') photo_image = np.array(pil_image) self.anime_video_frames.append(photo_image) def draw_face_box(self, frame, face_box_points): line_pairs = [[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]] for start, end in line_pairs: cv2.line(frame, face_box_points[start], face_box_points[end], (255, 0, 0), thickness=2) def draw_face_landmarks(self, frame, face_landmarks): for i in range(68): part = face_landmarks.part(i) x = part.x y = part.y cv2.rectangle(frame, (x - 1, y - 1), (x + 1, y + 1), (0, 255, 0), thickness=2) def crop_face(self, frame, cascade_file="lbpcascade_animeface.xml"): cascade_file = pathlib.Path(__file__).parent.absolute() / cascade_file if not cascade_file.is_file(): raise RuntimeError("%s: not found" % str(cascade_file)) frame = np.array(frame) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) gray = cv2.equalizeHist(gray) cascade = cv2.CascadeClassifier(str(cascade_file)) faces = cascade.detectMultiScale( gray, # detector options scaleFactor=1.1, minNeighbors=5, minSize=(24, 24)) (x, y, w, h) = faces[0] return frame[y:y + h, x:x + w]