def landmark_preprocess(noisy_path, dataset_path): noisy_train_path = os.path.join(noisy_path, "train") noisy_test_path = os.path.join(noisy_path, "test") noisy_val_path = os.path.join(noisy_path, "validation") train_path = os.path.join(dataset_path, "train") test_path = os.path.join(dataset_path, "test") val_path = os.path.join(dataset_path, "validation") checkpath(dataset_path) checkpath(train_path) checkpath(test_path) checkpath(val_path) fa = FaceAlignment(LandmarksType._2D, device="cuda:1") train_files = file_list(noisy_train_path, ".png") for i in train_files: landmarks = fa.get_landmarks_from_image(i)[0] img = plot_landmarks((300, 300, 3), landmarks) img.save(os.path.join(train_path, "lm" + i.split("y")[-1])) test_files = file_list(noisy_test_path, ".png") for i in test_files: landmarks = fa.get_landmarks_from_image(i)[0] img = plot_landmarks((300, 300, 3), landmarks) img.save(os.path.join(test_path, "lm" + i.split("y")[-1])) val_files = file_list(noisy_val_path, ".png") print(noisy_val_path) for i in val_files: landmarks = fa.get_landmarks_from_image(i)[0] img = plot_landmarks((300, 300, 3), landmarks) img.save(os.path.join(val_path, "lm" + i.split("y")[-1]))
def compare(root, f1, f2): global face_det global face_recon global face_align if not face_det: face_det = FaceDetection(gpu_id) if not face_recon: face_recon = FaceRecogniton(gpu_id) if not face_align: face_align = FaceAlignment(gpu_id) time_start = time.time() img_a = cv2.imread(root + '/' + f1) img_b = cv2.imread(root + '/' + f2) bbox_list1, a_point = face_det.get_max_bounding_box_by_image(img_a) bbox_list2, b_point = face_det.get_max_bounding_box_by_image(img_b) similarity = 0 if bbox_list1 and bbox_list2: a_aligned_faces = face_align.affine_face(img_a, a_point) b_aligned_faces = face_align.affine_face(img_b, b_point) similarity = face_recon.face_compare(a_aligned_faces, b_aligned_faces) #print similarity time_end = time.time() time_use = int(1000 * (time_end - time_start)) #print 'time_used:' + str(time_use) return similarity, time_use
def __init__(self, style_img, input_img, style_mask, input_mask, save=False): style_name = os.path.basename(style_img).split('.')[0] input_name = os.path.basename(input_img).split('.')[0] self.style_img = np.float32(imread(style_img)) self.input_img = np.float32(imread(input_img)) self.style_mask = np.float32(imread(style_mask)) self.input_mask = np.float32(imread(input_mask)) # Fetch Facial Landmarks if os.path.exists('input/%s_%s_lm.pkl' % (style_name, input_name)): with open('input/%s_%s_lm.pkl' % (style_name, input_name), 'rb') as f: pkl = pickle.load(f) self.style_lm = pkl['style'] self.input_lm = pkl['input'] else: fa = FaceAlignment(LandmarksType._2D, device='cpu', flip_input=False) self.style_lm = fa.get_landmarks(self.style_img)[0] self.input_lm = fa.get_landmarks(self.input_img)[0] with open('input/%s_%s_lm.pkl' % (style_name, input_name), 'wb') as f: pickle.dump({ 'style': self.style_lm, 'input': self.input_lm }, f, protocol=2) self.output_filename = '_'.join({input_name, style_name}) self.save = save
def encode_filter(filter_files): images = [] faces = [] FACE_ALIGNMENT = FaceAlignment(LandmarksType._2D, enable_cuda=True, flip_input=False) for i, filter_file in enumerate(filter_files): images.append(skimage.io.imread(str(filter_file))) faces.append(FACE_ALIGNMENT.get_landmarks(images[i])) FACE_ALIGNMENT = None face_recognition_model = face_recognition_models.face_recognition_model_location( ) face_encoder = dlib.face_recognition_model_v1(face_recognition_model) for i, face in enumerate(faces): if face is None: print('Warning: {} has no face.'.format(filter_files[i])) continue if len(face) > 1: print('Warning: {} has more than one face.'.format( filter_files[i])) parts = [] for p in face[0]: parts.append(dlib.point(p[0], p[1])) raw_landmark_set = dlib.full_object_detection(rect, parts) yield numpy.array( face_encoder.compute_face_descriptor(images[i], raw_landmark_set, 1))
def preprocess_dataset(source, output, device='cpu', size=0, overwrite=False, frame_rate=1): """ Starts the pre-processing of the VoxCeleb dataset used for the Talking Heads models. This process has the following steps: * Extract all frames of each video in the dataset. Frames of videos that are split in several files are joined together. * Select K+1 frames of each video that will be kept. K frames will be used to train the embedder network, while the other one will be used to train the generator network. The value of K can be configured in the config.py file. * Landmarks will be extracted for the face in each of the frames that are being kept. * The frames and the corresponding landmarks for each video will be saved in files (one for each video) in the output directory. We originally tried to process several videos simultaneously using multiprocessing, but this seems to actually slow down the process instead of speeding it up. :param source: Path to the raw VoxCeleb dataset. :param output: Path where the pre-processed videos will be stored. :param device: Device used to run the landmark extraction model. :param size: Size of the dataset to generate. If 0, the entire raw dataset will be processed, otherwise, as many videos will be processed as specified by this parameter. :param overwrite: f True, files that have already been processed will be overwritten, otherwise, they will be ignored and instead, different files will be loaded. """ logging.info('===== DATASET PRE-PROCESSING =====') logging.info(f'Running on {device.upper()}.') logging.info(f'Saving K+1 random frames from each video (K = {config.K}).') fa = FaceAlignment(LandmarksType._2D, device=device) video_list = get_video_list(source, size, output, overwrite=overwrite) logging.info(f'Processing {len(video_list)} videos...') # pool = Pool(processes=4, initializer=init_pool, initargs=(fa, output)) # pool.map(process_video_folder, video_list) init_pool(fa, output) counter = 1 for v in video_list: start_time = datetime.now() process_video_folder(v, frame_rate) logging.info( f'{counter}/{len(video_list)}\t{datetime.now()-start_time}') counter += 1 logging.info(f'All {len(video_list)} videos processed.')
def __getitem__(self, idx): real_idx = self.indexes[idx] path = self.files[real_idx] print("image file path=", path) fa = FaceAlignment(LandmarksType._2D, device=self.device) imgUMat = cv2.imread(path) x_temp = cv2.cvtColor(imgUMat, cv2.COLOR_BGR2RGB) y_temp = fa.get_landmarks(x_temp)[0] out = [] x = PIL.Image.fromarray(x_temp, 'RGB') y = plot_landmarks(x_temp, y_temp) if self.transform: x = self.transform(x) y = self.transform(y) out.append({'frame': x, 'landmarks': y}) return real_idx, out
def preprocess_dataset(source, output, device='cpu', size=0, overwrite=False): logging.info('===== DATASET PRE-PROCESSING =====') logging.info(f'Running on {device.upper()}.') logging.info(f'Saving K+1 random frames from each video (K = {K}).') fa = FaceAlignment(LandmarksType._2D, device=device) video_list = get_video_list(source, size, output, overwrite=overwrite) logging.info(f'Processing {len(video_list)} videos...') init_pool(fa, output) counter = 1 for v in video_list: process_video_folder(v) logging.info(f'{counter}/{len(video_list)}') counter += 1 logging.info(f'All {len(video_list)} videos processed.')
def evaluate(respth='./results/data_src', dspth='../data'): respth = osp.join(os.path.abspath(os.path.dirname(__file__)), respth) if not os.path.exists(respth): os.makedirs(respth) face_model = FaceAlignment(LandmarksType._2D, device="cuda") data_path = osp.join(os.path.abspath(os.path.dirname(__file__)), dspth) for image_path in os.listdir(data_path): image = cv2.imread(osp.join(data_path, image_path)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) landmark = face_model.get_landmarks(image)[-1] # print(landmark) mask = get_image_hull_mask(np.shape(image), landmark).astype(np.uint8) # cv2.imshow("mask", (mask*255).astype(np.uint8)) image_bgra = merge(image, mask) # cv2.imshow("image_bgra", image_bgra) # cv2.waitKey(1) save_path = osp.join(respth, image_path) cv2.imwrite(save_path[:-4] + '.png', image_bgra)
def getTransform(videoName, modelIdx): modelList = [ 'sw', 'han', 'tsai', 'father', 'cloud', 'aerith', 'tifa', 'davinci' ] G = network.Generator() G = load_model(G, "app/modules/talkingHeads/resource/" + modelList[modelIdx], modelList[modelIdx]) G = G.to("cuda:0") fa = FaceAlignment(LandmarksType._2D, device='cuda:0') e_vector = get_e_vector("app/modules/talkingHeads/resource/" + modelList[modelIdx] + "/" + modelList[modelIdx] + ".npy") timestamp = str(int(time.time())) print(timestamp) # generate_moving_video(G, "app/static/"+videoName, "app/modules/talkingHeads/resource/"+modelList[modelIdx]+"/"+modelList[modelIdx]+".npy", "app/static/result-"+timestamp+".mp4", "cuda:0") generate_moving_video(G, "app/static/" + videoName, e_vector, "app/static/result-" + timestamp + ".mp4", "cuda:0", fa) return jsonify({"code": 200, "message": "轉換成功", 'token': timestamp})
def eval(input_path, output_path, checkpoint_path, model, gpu): input = Image.open(input_path) input = input.convert("RGB") w, h = input.size w_, h_ = 128 * (w // 128), 128 * (h // 128) fa = FaceAlignment(LandmarksType._2D, device="cuda:" + str(gpu)) landmarks = fa.get_landmarks_from_image(input_path)[0] landmark_img = plot_landmarks(np.array(input), landmarks) transform_forward = transforms.Compose([ transforms.Resize((w_, h_)), transforms.CenterCrop((w_, h_)), transforms.ToTensor() ]) transform_backward = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((w, h)), transforms.CenterCrop((w, h)), ]) input = transform_forward(input) landmark_img = transform_forward(landmark_img) if model == "Pix2Pix": NFNet = Pix2Pix() else: NFNet = ResResNet() checkpoint = torch.load(checkpoint_path) NFNet.load_state_dict(checkpoint['my_classifier']) NFNet.to(gpu) x = torch.cat((input, landmark_img), 0) x = x.unsqueeze(0) x = x.to(gpu) output = NFNet(x) output = output.to("cpu") output = transform_backward(output[0]) output.save(output_path)
def __init__(self, args): self.args = args model = edict() self.threshold = args.threshold self.det_minsize = 50 self.det_threshold = [0.4, 0.6, 0.6] self.det_factor = 0.9 _vec = args.image_size.split(',') assert len(_vec) == 2 image_size = (int(_vec[0]), int(_vec[1])) self.image_size = image_size _vec = args.model.split(',') assert len(_vec) == 2 prefix = _vec[0] epoch = int(_vec[1]) print('loading', prefix, epoch) ctx = mx.gpu(args.gpu) sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) all_layers = sym.get_internals() sym = all_layers['fc1_output'] model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))]) model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))]) model.set_params(arg_params, aux_params) self.model = model # mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model') mtcnn_path = os.path.join('deploy', 'mtcnn-model') detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark=True, threshold=[0.0, 0.0, 0.2]) self.detector = detector self.FACE_ALIGNMENT = FaceAlignment(LandmarksType._3D, device='cuda', flip_input=False)
def monkey_patch_face_detector(_): detector = dlib.get_frontal_face_detector() class Rect(object): def __init__(self, rect): self.rect = rect def detect(*args): return [Rect(x) for x in detector(*args)] return detect dlib.cnn_face_detection_model_v1 = monkey_patch_face_detector FACE_ALIGNMENT = FaceAlignment(LandmarksType._2D, enable_cuda=True, flip_input=False) mean_face_x = numpy.array([ 0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124, 0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036, 0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918, 0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149, 0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721, 0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874, 0.553364, 0.490127, 0.42689 ]) mean_face_y = numpy.array([ 0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
from PIL import Image import regex import torch import subprocess import hashlib import sys from demo import load_checkpoints from animate import normalize_kp app = Flask(__name__) generator, kp_detector = load_checkpoints( config_path="first-order-model/config/vox-adv-256.yaml", checkpoint_path="vox-adv-cpk.pth.tar", ) fa = FaceAlignment(LandmarksType._2D) @app.route("/") def index(): return render_template("upload.html") def data(obj) -> str: return f"data: {json.dumps(obj)}\n\n" @app.route("/upload", methods=["POST"]) def upload(): for key, file in request.files.items(): pathlib.Path("static", key).mkdir(exist_ok=True)
def __init__(self, dimensions='2d'): landmarkType = LandmarksType._2D if dimensions == '2d' else LandmarksType._3D self.faceAlignment = FaceAlignment(landmarkType, flip_input=False, device='cpu', verbose=False)
import cv2 import random import numpy as np import pickle as pkl import tensorflow as tf from tqdm import tqdm import pandas as pd from functools import partial import multiprocessing as mp from imutils import face_utils from hyperparams import Hyperparams as hp from face_alignment import FaceAlignment, LandmarksType from utils import detector, predictor, preprocess_input global face_alignment face_alignment = FaceAlignment(LandmarksType._2D, device='cuda') def get_video_list(source = hp.dataset): """ Extracts a list of paths to videos to pre-process during the current run. :param source: Path to the root directory of the dataset. :return: List of paths to videos. """ video_list = [] for root, dirs, files in tqdm(os.walk(source)): if len(files) > 0: assert contains_only_videos(files) and len(dirs) == 0 video_list.append((root, files))
e_vector = get_e_vector("app/modules/talkingHeads/resource/" + modelList[modelIdx] + "/" + modelList[modelIdx] + ".npy") timestamp = str(int(time.time())) print(timestamp) # generate_moving_video(G, "app/static/"+videoName, "app/modules/talkingHeads/resource/"+modelList[modelIdx]+"/"+modelList[modelIdx]+".npy", "app/static/result-"+timestamp+".mp4", "cuda:0") generate_moving_video(G, "app/static/" + videoName, e_vector, "app/static/result-" + timestamp + ".mp4", "cuda:0", fa) return jsonify({"code": 200, "message": "轉換成功", 'token': timestamp}) G = network.Generator() G = load_model(G, "app/modules/talkingHeads/resource/han", "han") G = G.to("cuda:0") fa = FaceAlignment(LandmarksType._2D, device='cuda:0') e_vector = get_e_vector("app/modules/talkingHeads/resource/han/han.npy") def imgTransform(srcImage, modelIdx): image = base64_cv2(srcImage) # image = cv2.imread("app/modules/2.png") image = cv2.resize(image, (256, 256)) # modelList=['sw','han','tsai'] # G = network.Generator() # G = load_model(G, "app/modules/talkingHeads/resource/"+modelList[modelIdx], modelList[modelIdx]) # G = G.to("cuda:0") # fa = FaceAlignment(LandmarksType._2D, device='cuda:0') # e_vector = get_e_vector("app/modules/talkingHeads/resource/"+modelList[modelIdx]+"/"+modelList[modelIdx]+".npy") result = generate_moving_image(G, image, e_vector, "cuda:0", fa) print('don')