def landmark_preprocess(noisy_path, dataset_path):
    noisy_train_path = os.path.join(noisy_path, "train")
    noisy_test_path = os.path.join(noisy_path, "test")
    noisy_val_path = os.path.join(noisy_path, "validation")

    train_path = os.path.join(dataset_path, "train")
    test_path = os.path.join(dataset_path, "test")
    val_path = os.path.join(dataset_path, "validation")

    checkpath(dataset_path)
    checkpath(train_path)
    checkpath(test_path)
    checkpath(val_path)

    fa = FaceAlignment(LandmarksType._2D, device="cuda:1")

    train_files = file_list(noisy_train_path, ".png")
    for i in train_files:
        landmarks = fa.get_landmarks_from_image(i)[0]
        img = plot_landmarks((300, 300, 3), landmarks)
        img.save(os.path.join(train_path, "lm" + i.split("y")[-1]))

    test_files = file_list(noisy_test_path, ".png")
    for i in test_files:
        landmarks = fa.get_landmarks_from_image(i)[0]
        img = plot_landmarks((300, 300, 3), landmarks)
        img.save(os.path.join(test_path, "lm" + i.split("y")[-1]))

    val_files = file_list(noisy_val_path, ".png")
    print(noisy_val_path)
    for i in val_files:
        landmarks = fa.get_landmarks_from_image(i)[0]
        img = plot_landmarks((300, 300, 3), landmarks)
        img.save(os.path.join(val_path, "lm" + i.split("y")[-1]))
示例#2
0
def compare(root, f1, f2):
    global face_det
    global face_recon
    global face_align
    if not face_det:
        face_det = FaceDetection(gpu_id)
    if not face_recon:
        face_recon = FaceRecogniton(gpu_id)
    if not face_align:
        face_align = FaceAlignment(gpu_id)
    time_start = time.time()
    img_a = cv2.imread(root + '/' + f1)
    img_b = cv2.imread(root + '/' + f2)

    bbox_list1, a_point = face_det.get_max_bounding_box_by_image(img_a)
    bbox_list2, b_point = face_det.get_max_bounding_box_by_image(img_b)
    similarity = 0
    if bbox_list1 and bbox_list2:
        a_aligned_faces = face_align.affine_face(img_a, a_point)
        b_aligned_faces = face_align.affine_face(img_b, b_point)
        similarity = face_recon.face_compare(a_aligned_faces, b_aligned_faces)
        #print similarity
        time_end = time.time()
        time_use = int(1000 * (time_end - time_start))
        #print 'time_used:' + str(time_use)
    return similarity, time_use
示例#3
0
    def __init__(self, style_img, input_img, style_mask, input_mask, save=False):
        style_name = os.path.basename(style_img).split('.')[0]
        input_name = os.path.basename(input_img).split('.')[0]

        self.style_img = np.float32(imread(style_img))
        self.input_img = np.float32(imread(input_img))

        self.style_mask = np.float32(imread(style_mask))
        self.input_mask = np.float32(imread(input_mask))

        # Fetch Facial Landmarks
        if os.path.exists('input/%s_%s_lm.pkl' % (style_name, input_name)):
            with open('input/%s_%s_lm.pkl' % (style_name, input_name), 'rb') as f:
                pkl = pickle.load(f)
                self.style_lm = pkl['style']
                self.input_lm = pkl['input']
        else:
            fa = FaceAlignment(LandmarksType._2D, device='cpu', flip_input=False)
            self.style_lm = fa.get_landmarks(self.style_img)[0]
            self.input_lm = fa.get_landmarks(self.input_img)[0]
            with open('input/%s_%s_lm.pkl' % (style_name, input_name),
                      'wb') as f:
                pickle.dump({
                    'style': self.style_lm,
                    'input': self.input_lm
                }, f, protocol=2)

        self.output_filename = '_'.join({input_name, style_name})
        self.save = save
示例#4
0
    def encode_filter(filter_files):
        images = []
        faces = []

        FACE_ALIGNMENT = FaceAlignment(LandmarksType._2D,
                                       enable_cuda=True,
                                       flip_input=False)
        for i, filter_file in enumerate(filter_files):
            images.append(skimage.io.imread(str(filter_file)))
            faces.append(FACE_ALIGNMENT.get_landmarks(images[i]))
        FACE_ALIGNMENT = None

        face_recognition_model = face_recognition_models.face_recognition_model_location(
        )
        face_encoder = dlib.face_recognition_model_v1(face_recognition_model)
        for i, face in enumerate(faces):
            if face is None:
                print('Warning: {} has no face.'.format(filter_files[i]))
                continue
            if len(face) > 1:
                print('Warning: {} has more than one face.'.format(
                    filter_files[i]))

            parts = []
            for p in face[0]:
                parts.append(dlib.point(p[0], p[1]))
            raw_landmark_set = dlib.full_object_detection(rect, parts)
            yield numpy.array(
                face_encoder.compute_face_descriptor(images[i],
                                                     raw_landmark_set, 1))
def preprocess_dataset(source,
                       output,
                       device='cpu',
                       size=0,
                       overwrite=False,
                       frame_rate=1):
    """
    Starts the pre-processing of the VoxCeleb dataset used for the Talking Heads models. This process has the following
    steps:

    * Extract all frames of each video in the dataset. Frames of videos that are split in several files are joined
    together.
    * Select K+1 frames of each video that will be kept. K frames will be used to train the embedder network, while
    the other one will be used to train the generator network. The value of K can be configured in the config.py file.
    * Landmarks will be extracted for the face in each of the frames that are being kept.
    * The frames and the corresponding landmarks for each video will be saved in files (one for each video) in the
    output directory.

    We originally tried to process several videos simultaneously using multiprocessing, but this seems to actually
    slow down the process instead of speeding it up.


    :param source: Path to the raw VoxCeleb dataset.
    :param output: Path where the pre-processed videos will be stored.
    :param device: Device used to run the landmark extraction model.
    :param size: Size of the dataset to generate. If 0, the entire raw dataset will be processed, otherwise, as many
    videos will be processed as specified by this parameter.
    :param overwrite: f True, files that have already been processed will be overwritten, otherwise, they will be
    ignored and instead, different files will be loaded.
    """

    logging.info('===== DATASET PRE-PROCESSING =====')
    logging.info(f'Running on {device.upper()}.')
    logging.info(f'Saving K+1 random frames from each video (K = {config.K}).')
    fa = FaceAlignment(LandmarksType._2D, device=device)

    video_list = get_video_list(source, size, output, overwrite=overwrite)

    logging.info(f'Processing {len(video_list)} videos...')
    # pool = Pool(processes=4, initializer=init_pool, initargs=(fa, output))
    # pool.map(process_video_folder, video_list)

    init_pool(fa, output)
    counter = 1
    for v in video_list:
        start_time = datetime.now()
        process_video_folder(v, frame_rate)
        logging.info(
            f'{counter}/{len(video_list)}\t{datetime.now()-start_time}')
        counter += 1

    logging.info(f'All {len(video_list)} videos processed.')
示例#6
0
    def __getitem__(self, idx):
        real_idx = self.indexes[idx]
        path = self.files[real_idx]
        print("image file path=", path)
        fa = FaceAlignment(LandmarksType._2D, device=self.device)
        imgUMat = cv2.imread(path)
        x_temp = cv2.cvtColor(imgUMat, cv2.COLOR_BGR2RGB)
        y_temp = fa.get_landmarks(x_temp)[0]
        out = []
        x = PIL.Image.fromarray(x_temp, 'RGB')
        y = plot_landmarks(x_temp, y_temp)
        if self.transform:
            x = self.transform(x)
            y = self.transform(y)
        out.append({'frame': x, 'landmarks': y})

        return real_idx, out
示例#7
0
def preprocess_dataset(source, output, device='cpu', size=0, overwrite=False):
    logging.info('===== DATASET PRE-PROCESSING =====')
    logging.info(f'Running on {device.upper()}.')
    logging.info(f'Saving K+1 random frames from each video (K = {K}).')
    fa = FaceAlignment(LandmarksType._2D, device=device)

    video_list = get_video_list(source, size, output, overwrite=overwrite)

    logging.info(f'Processing {len(video_list)} videos...')

    init_pool(fa, output)
    counter = 1
    for v in video_list:
        process_video_folder(v)
        logging.info(f'{counter}/{len(video_list)}')
        counter += 1

    logging.info(f'All {len(video_list)} videos processed.')
def evaluate(respth='./results/data_src', dspth='../data'):
    respth = osp.join(os.path.abspath(os.path.dirname(__file__)), respth)
    if not os.path.exists(respth):
        os.makedirs(respth)

    face_model = FaceAlignment(LandmarksType._2D, device="cuda")
    data_path = osp.join(os.path.abspath(os.path.dirname(__file__)), dspth)
    for image_path in os.listdir(data_path):
        image = cv2.imread(osp.join(data_path, image_path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        landmark = face_model.get_landmarks(image)[-1]
        # print(landmark)
        mask = get_image_hull_mask(np.shape(image), landmark).astype(np.uint8)
        # cv2.imshow("mask", (mask*255).astype(np.uint8))

        image_bgra = merge(image, mask)
        # cv2.imshow("image_bgra", image_bgra)
        # cv2.waitKey(1)
        save_path = osp.join(respth, image_path)
        cv2.imwrite(save_path[:-4] + '.png', image_bgra)
示例#9
0
def getTransform(videoName, modelIdx):
    modelList = [
        'sw', 'han', 'tsai', 'father', 'cloud', 'aerith', 'tifa', 'davinci'
    ]
    G = network.Generator()
    G = load_model(G,
                   "app/modules/talkingHeads/resource/" + modelList[modelIdx],
                   modelList[modelIdx])
    G = G.to("cuda:0")
    fa = FaceAlignment(LandmarksType._2D, device='cuda:0')
    e_vector = get_e_vector("app/modules/talkingHeads/resource/" +
                            modelList[modelIdx] + "/" + modelList[modelIdx] +
                            ".npy")
    timestamp = str(int(time.time()))
    print(timestamp)
    #     generate_moving_video(G, "app/static/"+videoName, "app/modules/talkingHeads/resource/"+modelList[modelIdx]+"/"+modelList[modelIdx]+".npy", "app/static/result-"+timestamp+".mp4", "cuda:0")
    generate_moving_video(G, "app/static/" + videoName, e_vector,
                          "app/static/result-" + timestamp + ".mp4", "cuda:0",
                          fa)
    return jsonify({"code": 200, "message": "轉換成功", 'token': timestamp})
示例#10
0
def eval(input_path, output_path, checkpoint_path, model, gpu):
    input = Image.open(input_path)
    input = input.convert("RGB")

    w, h = input.size
    w_, h_ = 128 * (w // 128), 128 * (h // 128)

    fa = FaceAlignment(LandmarksType._2D, device="cuda:" + str(gpu))
    landmarks = fa.get_landmarks_from_image(input_path)[0]
    landmark_img = plot_landmarks(np.array(input), landmarks)

    transform_forward = transforms.Compose([
        transforms.Resize((w_, h_)),
        transforms.CenterCrop((w_, h_)),
        transforms.ToTensor()
    ])
    transform_backward = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((w, h)),
        transforms.CenterCrop((w, h)),
    ])

    input = transform_forward(input)
    landmark_img = transform_forward(landmark_img)

    if model == "Pix2Pix":
        NFNet = Pix2Pix()
    else:
        NFNet = ResResNet()

    checkpoint = torch.load(checkpoint_path)
    NFNet.load_state_dict(checkpoint['my_classifier'])
    NFNet.to(gpu)

    x = torch.cat((input, landmark_img), 0)
    x = x.unsqueeze(0)
    x = x.to(gpu)
    output = NFNet(x)
    output = output.to("cpu")
    output = transform_backward(output[0])
    output.save(output_path)
示例#11
0
    def __init__(self, args):
        self.args = args
        model = edict()

        self.threshold = args.threshold
        self.det_minsize = 50
        self.det_threshold = [0.4, 0.6, 0.6]
        self.det_factor = 0.9
        _vec = args.image_size.split(',')
        assert len(_vec) == 2
        image_size = (int(_vec[0]), int(_vec[1]))
        self.image_size = image_size
        _vec = args.model.split(',')
        assert len(_vec) == 2
        prefix = _vec[0]
        epoch = int(_vec[1])
        print('loading', prefix, epoch)
        ctx = mx.gpu(args.gpu)
        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        all_layers = sym.get_internals()
        sym = all_layers['fc1_output']
        model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
        #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
        model.bind(data_shapes=[('data', (1, 3, image_size[0],
                                          image_size[1]))])
        model.set_params(arg_params, aux_params)
        self.model = model
        # mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model')
        mtcnn_path = os.path.join('deploy', 'mtcnn-model')
        detector = MtcnnDetector(model_folder=mtcnn_path,
                                 ctx=ctx,
                                 num_worker=1,
                                 accurate_landmark=True,
                                 threshold=[0.0, 0.0, 0.2])
        self.detector = detector
        self.FACE_ALIGNMENT = FaceAlignment(LandmarksType._3D,
                                            device='cuda',
                                            flip_input=False)
示例#12
0
def monkey_patch_face_detector(_):
    detector = dlib.get_frontal_face_detector()

    class Rect(object):
        def __init__(self, rect):
            self.rect = rect

    def detect(*args):
        return [Rect(x) for x in detector(*args)]

    return detect


dlib.cnn_face_detection_model_v1 = monkey_patch_face_detector
FACE_ALIGNMENT = FaceAlignment(LandmarksType._2D,
                               enable_cuda=True,
                               flip_input=False)

mean_face_x = numpy.array([
    0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483,
    0.799124, 0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127,
    0.36688, 0.426036, 0.490127, 0.554217, 0.613373, 0.121737, 0.187122,
    0.265825, 0.334606, 0.260918, 0.182743, 0.645647, 0.714428, 0.793132,
    0.858516, 0.79751, 0.719335, 0.254149, 0.340985, 0.428858, 0.490127,
    0.551395, 0.639268, 0.726104, 0.642159, 0.556721, 0.490127, 0.423532,
    0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874, 0.553364,
    0.490127, 0.42689
])

mean_face_y = numpy.array([
    0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
示例#13
0
from PIL import Image
import regex
import torch
import subprocess
import hashlib
import sys
from demo import load_checkpoints
from animate import normalize_kp

app = Flask(__name__)

generator, kp_detector = load_checkpoints(
    config_path="first-order-model/config/vox-adv-256.yaml",
    checkpoint_path="vox-adv-cpk.pth.tar",
)
fa = FaceAlignment(LandmarksType._2D)


@app.route("/")
def index():
    return render_template("upload.html")


def data(obj) -> str:
    return f"data: {json.dumps(obj)}\n\n"


@app.route("/upload", methods=["POST"])
def upload():
    for key, file in request.files.items():
        pathlib.Path("static", key).mkdir(exist_ok=True)
示例#14
0
 def __init__(self, dimensions='2d'):
     landmarkType = LandmarksType._2D if dimensions == '2d' else LandmarksType._3D
     self.faceAlignment = FaceAlignment(landmarkType,
                                        flip_input=False,
                                        device='cpu',
                                        verbose=False)
示例#15
0
import cv2
import random
import numpy as np
import pickle as pkl
import tensorflow as tf
from tqdm import tqdm
import pandas as pd
from functools import partial
import multiprocessing as mp
from imutils import face_utils
from hyperparams import Hyperparams as hp
from face_alignment import FaceAlignment, LandmarksType
from utils import detector, predictor, preprocess_input

global face_alignment
face_alignment = FaceAlignment(LandmarksType._2D, device='cuda')

def get_video_list(source = hp.dataset):
    """
    Extracts a list of paths to videos to pre-process during the current run.

    :param source: Path to the root directory of the dataset.
    :return: List of paths to videos.
    """
    video_list = []
    
    for root, dirs, files in tqdm(os.walk(source)):
        if len(files) > 0:
            assert contains_only_videos(files) and len(dirs) == 0
            video_list.append((root, files))
示例#16
0
    e_vector = get_e_vector("app/modules/talkingHeads/resource/" +
                            modelList[modelIdx] + "/" + modelList[modelIdx] +
                            ".npy")
    timestamp = str(int(time.time()))
    print(timestamp)
    #     generate_moving_video(G, "app/static/"+videoName, "app/modules/talkingHeads/resource/"+modelList[modelIdx]+"/"+modelList[modelIdx]+".npy", "app/static/result-"+timestamp+".mp4", "cuda:0")
    generate_moving_video(G, "app/static/" + videoName, e_vector,
                          "app/static/result-" + timestamp + ".mp4", "cuda:0",
                          fa)
    return jsonify({"code": 200, "message": "轉換成功", 'token': timestamp})


G = network.Generator()
G = load_model(G, "app/modules/talkingHeads/resource/han", "han")
G = G.to("cuda:0")
fa = FaceAlignment(LandmarksType._2D, device='cuda:0')
e_vector = get_e_vector("app/modules/talkingHeads/resource/han/han.npy")


def imgTransform(srcImage, modelIdx):
    image = base64_cv2(srcImage)
    #     image = cv2.imread("app/modules/2.png")
    image = cv2.resize(image, (256, 256))
    #     modelList=['sw','han','tsai']
    #     G = network.Generator()
    #     G = load_model(G, "app/modules/talkingHeads/resource/"+modelList[modelIdx], modelList[modelIdx])
    #     G = G.to("cuda:0")
    #     fa = FaceAlignment(LandmarksType._2D, device='cuda:0')
    #     e_vector = get_e_vector("app/modules/talkingHeads/resource/"+modelList[modelIdx]+"/"+modelList[modelIdx]+".npy")
    result = generate_moving_image(G, image, e_vector, "cuda:0", fa)
    print('don')