Пример #1
0
 def __init__(self, blaze_weight, anchors, scale: float = 1.0):
     super().__init__()
     face_detector = BlazeFace().to(device)
     face_detector.load_weights(blaze_weight)
     face_detector.load_anchors(anchors)
     _ = face_detector.train(False)
     self.extractor = FaceExtractor(face_detector, margin=scale - 1)
Пример #2
0
def main():
    # Here we check the train data files extensions.
    train_list = list(os.listdir(os.path.join(DATA_FOLDER, TRAIN_SAMPLE_FOLDER)))
    ext_dict = []
    for file in train_list:
        file_ext = file.split('.')[1]
        if (file_ext not in ext_dict):
            ext_dict.append(file_ext)
    print(f"Extensions: {ext_dict}")

    # Let's count how many files with each extensions there are.
    for file_ext in ext_dict:
        print(
            f"Files with extension `{file_ext}`: {len([file for file in train_list if  file.endswith(file_ext)])}")

    test_list = list(os.listdir(os.path.join(DATA_FOLDER, TEST_FOLDER)))
    ext_dict = []
    for file in test_list:
        file_ext = file.split('.')[1]
        if (file_ext not in ext_dict):
            ext_dict.append(file_ext)
    print(f"Extensions: {ext_dict}")
    for file_ext in ext_dict:
        print(
            f"Files with extension `{file_ext}`: {len([file for file in train_list if  file.endswith(file_ext)])}")

    json_file = [file for file in train_list if file.endswith('json')][0]
    print(f"JSON file: {json_file}")

    meta_train_df = get_meta_from_json(TRAIN_SAMPLE_FOLDER, json_file)
    meta_train_df.head()

    fake_train_sample_video = list(
        meta_train_df.loc[meta_train_df.label == 'FAKE'].sample(3).index)
    real_train_sample_video = list(
        meta_train_df.loc[meta_train_df.label == 'REAL'].sample(3).index)

    print("PyTorch version:", torch.__version__)
    print("CUDA version:", torch.version.cuda)
    print("cuDNN version:", torch.backends.cudnn.version())

    gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(gpu)
    
    net=BlazeFace().to(gpu)
    net.load_weights("../input/blazeface.pth")
    net.load_anchors("../input/anchors.npy")

    for video_file in fake_train_sample_video:
        get_frame_faces(os.path.join(
            DATA_FOLDER, TRAIN_SAMPLE_FOLDER, video_file), net)
Пример #3
0
    def detect(self):
        model = BlazeFace()
        model.load_weights(self.model_weights)
        model.load_anchors(self.model_anchors)

        # img =  cv2.cvtColor(cv2.imread(self.img_path), cv2.COLOR_BGR2RGB)
        # img_res = cv2.resize(img, (self.img_size, self.img_size))
        img_res = cv2.resize(self.img_arr, (self.img_size, self.img_size))

        results = model.predict_on_image(img_res)

        self.detections = results

        return results
Пример #4
0
def process_video(video_path, filename, image_path, original):
    gpu = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    facedet = BlazeFace().to(gpu)
    facedet.load_weights("blazeface.pth")
    facedet.load_anchors("anchors.npy")
    _ = facedet.train(False)

    from helpers_read_video_1 import VideoReader
    from helpers_face_extract_1 import FaceExtractor

    frames_per_video = 10

    video_reader = VideoReader()
    video_read_fn = lambda x: video_reader.read_random_frames(
        x, num_frames=frames_per_video)
    face_extractor = FaceExtractor(video_read_fn, facedet)

    faces = face_extractor.process_video(video_path)
    # Only look at one face per frame.
    face_extractor.keep_only_best_face(faces)
    n = 0
    for frame_data in faces:
        for face in frame_data["faces"]:
            face_locations = face_recognition.face_locations(face)
            for face_location in face_locations:

                top, right, bottom, left = face_location
                face_image = face[top:bottom, left:right]
                resized_face = cv2.resize(face_image, (224, 224),
                                          interpolation=cv2.INTER_AREA)
                resized_face = cv2.cvtColor(resized_face, cv2.COLOR_RGB2BGR)

                cv2.imwrite(
                    image_path + "/" + filename[:-4] + original + "_" +
                    str(n) + ".jpg", resized_face,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 85])

                n += 1
Пример #5
0
    def _load_face_extractor(self):
        """
            Init and Return the face extractor object (implemented in deepfakes-inference-demo/helpers/face_extract_1) 
            that consists of a video reader function and a facedetector 
        """
        import sys

        sys.path.insert(0, os.path.join(self.root_path, "blazeface-pytorch"))
        sys.path.insert(
            0, os.path.join(self.root_path, "deepfakes-inference-demo"))

        #Load the face detection model BlazeFace, based on https://github.com/tkat0/PyTorch_BlazeFace/
        from blazeface import BlazeFace
        facedet = BlazeFace().to(self.gpu)
        #Load the pretrained weights
        facedet.load_weights(
            os.path.join(self.root_path, "blazeface-pytorch/blazeface.pth"))
        facedet.load_anchors(
            os.path.join(self.root_path, "blazeface-pytorch/anchors.npy"))
        #Set the module in evaluation mode
        _ = facedet.train(False)

        from helpers.read_video_1 import VideoReader
        from helpers.face_extract_1 import FaceExtractor

        #set number of frames to be read from the video, taken regulary from the beggining to the end of the video
        self.frames_per_video = 17
        #init video reader
        video_reader = VideoReader()
        #create a lambda function to read the frames where x is the video path
        video_read_fn = lambda x: video_reader.read_frames(
            x, num_frames=self.frames_per_video)
        #init the face extractor with the video reader function and the facedetector
        face_extractor = FaceExtractor(video_read_fn, facedet)

        return face_extractor
Пример #6
0
import random
from concurrent.futures import ThreadPoolExecutor

sys.path.insert(1, 'helpers')
sys.path.insert(1, 'model')
sys.path.insert(1, 'weight')

from cvit import CViT
from helpers_read_video_1 import VideoReader
from helpers_face_extract_1 import FaceExtractor

device = 'cuda' if torch.cuda.is_available() else 'cpu'

from blazeface import BlazeFace
facedet = BlazeFace().to(device)
facedet.load_weights("helpers/blazeface.pth")
facedet.load_anchors("helpers/anchors.npy")
_ = facedet.train(False)

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

normalize_transform = transforms.Compose([transforms.Normalize(mean, std)])

tresh = 50
sample = 'sample__prediction_data/'

ran = random.randint(0, 400)
ran_min = abs(ran - 1)

filenames = sorted([x for x in os.listdir(sample) if x[-4:] == ".mp4"
Пример #7
0
from blazebase import resize_pad, denormalize_detections
from blazeface import BlazeFace
from blazepalm import BlazePalm
from blazeface_landmark import BlazeFaceLandmark
from blazehand_landmark import BlazeHandLandmark

from visualization import draw_detections, draw_landmarks, draw_roi, HAND_CONNECTIONS, FACE_CONNECTIONS

gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.set_grad_enabled(False)

back_detector = True

face_detector = BlazeFace(back_model=back_detector).to(gpu)
if back_detector:
    face_detector.load_weights("blazefaceback.pth")
    face_detector.load_anchors("anchors_face_back.npy")
else:
    face_detector.load_weights("blazeface.pth")
    face_detector.load_anchors("anchors_face.npy")

palm_detector = BlazePalm().to(gpu)
palm_detector.load_weights("blazepalm.pth")
palm_detector.load_anchors("anchors_palm.npy")
palm_detector.min_score_thresh = .75

hand_regressor = BlazeHandLandmark().to(gpu)
hand_regressor.load_weights("blazehand_landmark.pth")

face_regressor = BlazeFaceLandmark().to(gpu)
face_regressor.load_weights("blazeface_landmark.pth")
Пример #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--source',
                        type=Path,
                        help='Videos root directory',
                        required=True)
    parser.add_argument('--videodf',
                        type=Path,
                        help='Path to read the videos DataFrame')
    parser.add_argument('--facesfolder',
                        type=Path,
                        help='Faces output root directory',
                        required=True)
    parser.add_argument('--facesdf',
                        type=Path,
                        help='Path to save the output DataFrame of faces',
                        required=True)
    parser.add_argument('--checkpoint',
                        type=Path,
                        help='Path to save the temporary per-video outputs',
                        required=True)

    parser.add_argument('--fpv', type=int, default=32, help='Frames per video')
    parser.add_argument(
        '--device',
        type=torch.device,
        default=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'),
        help='Device to use for face extraction')
    parser.add_argument('--collateonly',
                        help='Only perform collation of pre-existing results',
                        action='store_true')
    parser.add_argument('--noindex',
                        help='Do not rebuild the index',
                        action='store_false')
    parser.add_argument('--batch', type=int, help='Batch size', default=16)
    parser.add_argument('--threads',
                        type=int,
                        help='Number of threads',
                        default=8)
    parser.add_argument('--offset',
                        type=int,
                        help='Offset to start extraction',
                        default=0)
    parser.add_argument('--num',
                        type=int,
                        help='Number of videos to process',
                        default=0)
    parser.add_argument('--lazycheck',
                        action='store_true',
                        help='Lazy check of existing video indexes')
    parser.add_argument('--deepcheck',
                        action='store_true',
                        help='Try to open every image')

    args = parser.parse_args()

    ## Parameters parsing
    device: torch.device = args.device
    source_dir: Path = args.source
    facedestination_dir: Path = args.facesfolder
    frames_per_video: int = args.fpv
    videodataset_path: Path = args.videodf
    facesdataset_path: Path = args.facesdf
    collateonly: bool = args.collateonly
    batch_size: int = args.batch
    threads: int = args.threads
    offset: int = args.offset
    num: int = args.num
    lazycheck: bool = args.lazycheck
    deepcheck: bool = args.deepcheck
    checkpoint_folder: Path = args.checkpoint
    index_enable: bool = args.noindex

    ## Parameters
    face_size = 512

    print('Loading video DataFrame')
    df_videos = pd.read_pickle(videodataset_path)

    if num > 0:
        df_videos_process = df_videos.iloc[offset:offset + num]
    else:
        df_videos_process = df_videos.iloc[offset:]

    if not collateonly:

        ## Blazeface loading
        print('Loading face extractor')
        facedet = BlazeFace().to(device)
        facedet.load_weights("blazeface/blazeface.pth")
        facedet.load_anchors("blazeface/anchors.npy")
        videoreader = VideoReader(verbose=False)
        video_read_fn = lambda x: videoreader.read_frames(
            x, num_frames=frames_per_video)
        face_extractor = FaceExtractor(video_read_fn, facedet)

        ## Face extraction
        with ThreadPoolExecutor(threads) as p:
            for batch_idx0 in tqdm(np.arange(start=0,
                                             stop=len(df_videos_process),
                                             step=batch_size),
                                   desc='Extracting faces'):
                tosave_list = list(
                    p.map(
                        partial(
                            process_video,
                            source_dir=source_dir,
                            facedestination_dir=facedestination_dir,
                            checkpoint_folder=checkpoint_folder,
                            face_size=face_size,
                            face_extractor=face_extractor,
                            lazycheck=lazycheck,
                            deepcheck=deepcheck,
                        ), df_videos_process.iloc[batch_idx0:batch_idx0 +
                                                  batch_size].iterrows()))
                for tosave in tosave_list:
                    if tosave is not None:
                        if len(tosave[2]):
                            list(p.map(save_jpg, tosave[2]))
                        tosave[1].parent.mkdir(parents=True, exist_ok=True)
                        tosave[0].to_pickle(str(tosave[1]))

    if index_enable:
        # Collect checkpoints
        df_videos['nfaces'] = np.zeros(len(df_videos), np.uint8)
        faces_dataset = []
        for idx, record in tqdm(df_videos.iterrows(),
                                total=len(df_videos),
                                desc='Collecting faces results'):
            # Checkpoint
            video_face_checkpoint_path = checkpoint_folder.joinpath(
                record['path']).with_suffix('.faces.pkl')
            if video_face_checkpoint_path.exists():
                try:
                    df_video_faces = pd.read_pickle(
                        str(video_face_checkpoint_path))
                    # Fix same attribute issue
                    df_video_faces = df_video_faces.rename(
                        columns={'subject': 'videosubject'}, errors='ignore')
                    nfaces = len(
                        np.unique(
                            df_video_faces.index.map(lambda x: int(
                                x.split('_subj')[1].split('.jpg')[0]))))
                    df_videos.loc[idx, 'nfaces'] = nfaces
                    faces_dataset.append(df_video_faces)
                except Exception as e:
                    print('Error while reading: {}'.format(
                        video_face_checkpoint_path))
                    print(e)
                    video_face_checkpoint_path.unlink()

        # Save videos with updated faces
        print('Saving videos DataFrame to {}'.format(videodataset_path))
        df_videos.to_pickle(str(videodataset_path))

        if offset is not None:
            if num is not None:
                facesdataset_path = facesdataset_path.parent.joinpath(
                    str(facesdataset_path.parts[-1]).split('.')[0] +
                    '_from_video_{}_to_video_{}.pkl'.format(
                        offset, num + offset))
            else:
                facesdataset_path = facesdataset_path.parent.joinpath(
                    str(facesdataset_path.parts[-1]).split('.')[0] +
                    '_from_video_{}.pkl'.format(offset))
        elif num is not None:
            facesdataset_path = facesdataset_path.parent.joinpath(
                str(facesdataset_path.parts[-1]).split('.')[0] +
                '_from_video_{}_to_video_{}.pkl'.format(0, num))

        # Creates directory (if doesn't exist)
        facesdataset_path.parent.mkdir(parents=True, exist_ok=True)
        print('Saving faces DataFrame to {}'.format(facesdataset_path))
        df_faces = pd.concat(
            faces_dataset,
            axis=0,
        )
        df_faces['video'] = df_faces['video'].astype('category')
        for key in [
                'kp1x',
                'kp1y',
                'kp2x',
                'kp2y',
                'kp3x',
                'kp3y',
                'kp4x',
                'kp4y',
                'kp5x',
                'kp5y',
                'kp6x',
                'kp6y',
                'left',
                'top',
                'right',
                'bottom',
        ]:
            df_faces[key] = df_faces[key].astype(np.int16)
        df_faces['videosubject'] = df_faces['videosubject'].astype(np.int8)
        # Eventually remove duplicates
        df_faces = df_faces.loc[~df_faces.index.duplicated(keep='first')]
        fields_to_preserve_from_video = [
            i for i in ['folder', 'subject', 'scene', 'cluster', 'nfaces']
            if i in df_videos
        ]
        df_faces = pd.merge(df_faces,
                            df_videos[fields_to_preserve_from_video],
                            left_on='video',
                            right_index=True)
        df_faces.to_pickle(str(facesdataset_path))

    print('Completed!')
Пример #9
0
from blazeface import BlazeFace
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt
import random
import pickle

DATA_FOLDER = '../input/deepfake-detection-challenge'
TRAIN_SAMPLE_FOLDER = 'train_sample_videos'
TEST_FOLDER = 'test_videos'

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

NET = BlazeFace().to(device)
NET.load_weights("../input/blazeface.pth")
NET.load_anchors("../input/anchors.npy")


class MyLSTM(nn.Module):
    def __init__(self, num_layers=2, num_hidden_nodes=512):
        super(MyLSTM, self).__init__()
        self.num_layers = num_layers
        self.num_hidden_nodes = num_hidden_nodes

        # input dim is 167, output 200
        self.lstm = nn.LSTM(167,
                            num_hidden_nodes,
                            batch_first=True,
                            num_layers=num_layers)
        # fully connected
Пример #10
0
def run_nb(modelname):
    # ## Parameters

    # In[2]:
    """
    Choose an architecture between
    - EfficientNetB4
    - EfficientNetB4ST
    - EfficientNetAutoAttB4
    - EfficientNetAutoAttB4ST
    - Xception
    """
    net_model = modelname
    """
    Choose a training dataset between
    - DFDC
    - FFPP
    """
    train_db = 'DFDC'

    # In[3]:

    device = torch.device(
        'cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    face_policy = 'scale'
    face_size = 224
    frames_per_video = 32

    # ## Initialization

    # In[4]:

    print('=' * 20)
    model_url = weights.weight_url['{:s}_{:s}'.format(net_model, train_db)]
    print('=' * 20)
    net = getattr(fornet, net_model)().eval().to(device)
    print('=' * 20)
    net.load_state_dict(
        load_url(model_url, map_location=device, check_hash=True))

    # In[5]:

    transf = utils.get_transformer(face_policy,
                                   face_size,
                                   net.get_normalizer(),
                                   train=False)

    # In[6]:

    facedet = BlazeFace().to(device)
    facedet.load_weights("../blazeface/blazeface.pth")
    facedet.load_anchors("../blazeface/anchors.npy")
    videoreader = VideoReader(verbose=False)
    video_read_fn = lambda x: videoreader.read_frames(
        x, num_frames=frames_per_video)
    face_extractor = FaceExtractor(video_read_fn=video_read_fn,
                                   facedet=facedet)

    # ## Detect faces

    # In[7]:

    torch.cuda.is_available()

    # In[8]:

    torch.cuda.current_device()

    # In[9]:

    torch.cuda.device(0)

    # In[10]:

    torch.cuda.device_count()

    # In[11]:

    torch.cuda.get_device_name(0)

    # In[12]:

    vid_real_faces = face_extractor.process_video('samples/lynaeydofd.mp4')
    vid_fake_faces = face_extractor.process_video('samples/mqzvfufzoq.mp4')

    # In[13]:

    im_real_face = vid_real_faces[0]['faces'][0]
    im_fake_face = vid_fake_faces[0]['faces'][0]

    # In[14]:

    fig, ax = plt.subplots(1, 2, figsize=(8, 4))

    ax[0].imshow(im_real_face)
    ax[0].set_title('REAL')

    ax[1].imshow(im_fake_face)
    ax[1].set_title('FAKE')

    # ## Predict scores for each frame

    # In[15]:

    # For each frame, we consider the face with the highest confidence score found by BlazeFace (= frame['faces'][0])
    faces_real_t = torch.stack([
        transf(image=frame['faces'][0])['image'] for frame in vid_real_faces
        if len(frame['faces'])
    ])
    faces_fake_t = torch.stack([
        transf(image=frame['faces'][0])['image'] for frame in vid_fake_faces
        if len(frame['faces'])
    ])

    with torch.no_grad():
        faces_real_pred = net(faces_real_t.to(device)).cpu().numpy().flatten()
        faces_fake_pred = net(faces_fake_t.to(device)).cpu().numpy().flatten()

    # In[16]:

    fig, ax = plt.subplots(1, 2, figsize=(12, 4))

    ax[0].stem([f['frame_idx'] for f in vid_real_faces if len(f['faces'])],
               expit(faces_real_pred),
               use_line_collection=True)
    ax[0].set_title('REAL')
    ax[0].set_xlabel('Frame')
    ax[0].set_ylabel('Score')
    ax[0].set_ylim([0, 1])
    ax[0].grid(True)

    ax[1].stem([f['frame_idx'] for f in vid_fake_faces if len(f['faces'])],
               expit(faces_fake_pred),
               use_line_collection=True)
    ax[1].set_title('FAKE')
    ax[1].set_xlabel('Frame')
    ax[1].set_ylabel('Score')
    ax[1].set_ylim([0, 1])
    ax[1].set_yticks([0, 1], ['REAL', 'FAKE'])

    # In[17]:
    """
    Print average scores.
    An average score close to 0 predicts REAL. An average score close to 1 predicts FAKE.
    """
    print('Average score for REAL video: {:.4f}'.format(
        expit(faces_real_pred.mean())))
    print('Average score for FAKE face: {:.4f}'.format(
        expit(faces_fake_pred.mean())))
Пример #11
0
import torch.nn as nn

from network.models import model_selection

test_dir = "/home/dchen/DFDC/test_videos/"
test_videos = sorted([x for x in os.listdir(test_dir) if x[-4:] == ".mp4"])

gpu = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")

import sys
sys.path.insert(0, "/home/dchen/DFDC/blazeface-pytorch")
sys.path.insert(0, "/home/dchen/DFDC/deepfakes-inference-demo")

from blazeface import BlazeFace
facedet = BlazeFace().to(gpu)
facedet.load_weights("/home/dchen/DFDC/blazeface-pytorch/blazeface.pth")
facedet.load_anchors("/home/dchen/DFDC/blazeface-pytorch/anchors.npy")
_ = facedet.train(False)

from helpers.read_video_1 import VideoReader
from helpers.face_extract_1 import FaceExtractor

frames_per_video = 16

video_reader = VideoReader()
video_read_fn = lambda x: video_reader.read_frames(x,
                                                   num_frames=frames_per_video)
face_extractor = FaceExtractor(video_read_fn, facedet)

input_size = 299
Пример #12
0
            offset = 4 + k * 2
            # raw_boxes[:, offset    ] = (raw_boxes[:, offset    ] / 128.0) * self.anchors[:, 2] + self.anchors[:, 0] # x
            # raw_boxes[:, offset + 1] = (raw_boxes[:, offset + 1] / 128.0) * self.anchors[:, 3] + self.anchors[:, 1] # y
            concat_stuff.append((raw_boxes[:, offset] / 128.0) *
                                self.anchors[:, 2] + self.anchors[:, 0])
            concat_stuff.append((raw_boxes[:, offset + 1] / 128.0) *
                                self.anchors[:, 3] + self.anchors[:, 1])

        return torch.stack(concat_stuff, dim=-1)


import coremltools as ct
from coremltools.converters.onnx import convert

bfModel = BlazeFace()
bfModel.load_weights("./blazeface.pth")
bfModel.load_anchors("./anchors.npy")

bfs = BlazeFaceScaled(bfModel)
bfs.eval()

traced_model = torch.jit.trace(bfs,
                               torch.rand(1, 3, 128, 128),
                               check_trace=True)
# print(traced_model)
mlmodel = ct.convert(traced_model,
                     inputs=[
                         ct.ImageType(name="image",
                                      shape=ct.Shape(shape=(
                                          1,
                                          3,
Пример #13
0
print('=' * 20)
net = getattr(fornet, net_model)().eval().to(device)
print('=' * 20)
net.load_state_dict(load_url(model_url, map_location=device, check_hash=True))

# In[5]:

transf = utils.get_transformer(face_policy,
                               face_size,
                               net.get_normalizer(),
                               train=False)

# In[6]:

facedet = BlazeFace().to(device)
facedet.load_weights("../blazeface/blazeface.pth")
facedet.load_anchors("../blazeface/anchors.npy")
videoreader = VideoReader(verbose=False)
video_read_fn = lambda x: videoreader.read_frames(x,
                                                  num_frames=frames_per_video)
face_extractor = FaceExtractor(video_read_fn=video_read_fn, facedet=facedet)

# ## Detect faces

# In[7]:

torch.cuda.is_available()

# In[8]:

torch.cuda.current_device()
Пример #14
0
    with ThreadPoolExecutor(max_workers=num_workers) as ex:
        meta = tqdm(ex.map(process_file, range(len(df))), total=len(df))

    return pd.DataFrame(meta)


if __name__ == '__main__':
    args = parser.parse_args()
    assert args.df is not None, 'Need to specify metadata file'
    with open(args.config) as f:
        config = yaml.load(f)
    device = torch.device('cuda:{}'.format(args.gpu))

    df = pd.read_csv(args.df)
    path = config['data_path']
    # Facedet
    facedet = BlazeFace().to(device)
    facedet.load_weights("./dfdet/BlazeFace/blazeface.pth")
    facedet.load_anchors("./dfdet/BlazeFace/anchors.npy")
    _ = facedet.train(False)
    #
    video_reader = VideoReader()

    def video_read_fn(x):
        return video_reader.read_frames(x, num_frames=config['n_frames'])

    face_extractor = FaceExtractor(video_read_fn, facedet)
    faces_dataframe = preprocess_on_video_set(df, 4)
    faces_dataframe.to_csv('{}/faces_metadata.csv'.format(config['out_path']))
Пример #15
0
class MouthDetector():

    def __init__(self, device):
        self.net = BlazeFace().to(device)
        self.net.load_weights("blazeface.pth")
        self.net.load_anchors("anchors.npy")

        self.mouth_region_size = (64,64)
        self.img_dims = (128, 128)

    def plot_detections(self, img, detections, with_keypoints=True):
        fig, ax = plt.subplots(1, figsize=(10, 10))
        ax.grid(False)
        ax.imshow(img/255.)
        
        if isinstance(detections, torch.Tensor):
            detections = detections.cpu().numpy()

        if detections.ndim == 1:
            detections = np.expand_dims(detections, axis=0)

        print("Found %d faces" % detections.shape[0])
            
        for i in range(detections.shape[0]):
            ymin = detections[i, 0] * img.shape[0]
            xmin = detections[i, 1] * img.shape[1]
            ymax = detections[i, 2] * img.shape[0]
            xmax = detections[i, 3] * img.shape[1]

            rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                     linewidth=1, edgecolor="r", facecolor="none", 
                                     alpha=detections[i, 16])
            ax.add_patch(rect)
            print(ymin, ymax, xmin, xmax)
            if with_keypoints:
                for k in range(2,3):
                    kp_x = detections[i, 4 + k*2    ] * img.shape[1]
                    kp_y = detections[i, 4 + k*2 + 1] * img.shape[0]
                    circle = patches.Circle((kp_x, kp_y), radius=0.5, linewidth=1, 
                                            edgecolor="lightskyblue", facecolor="none", 
                                            alpha=detections[i, 16])
                    ax.add_patch(circle)
            
        plt.show()



    def mouth_detection(self, img, detections, with_keypoints=True, img_dims=(128,128)):
        # fig, ax = plt.subplots(1, figsize=(10, 10))
        # ax.grid(False)
        # ax.imshow(img/255.)
        
        if isinstance(detections, torch.Tensor):
            detections = detections.cpu().numpy()

        if detections.ndim == 1:
            detections = np.expand_dims(detections, axis=0)

        print("Found %d faces" % detections.shape[0])
        i = 0 # first face detection
        k = 2 # nose keypoint
        # for i in range(detections.shape[0]): #for all faces
        ymin = detections[i, 0] * img_dims[0]
        xmin = detections[i, 1] * img_dims[1]
        ymax = detections[i, 2] * img_dims[0]
        xmax = detections[i, 3] * img_dims[1]

        # print(xmin, xmax, ymin, ymax)
        # for k in range(2,3):  #for all keypoints
        kp_x = detections[i, 4 + k*2    ] * img_dims[1]
        kp_y = detections[i, 4 + k*2 + 1] * img_dims[0]

        print('########')
        print(kp_y, kp_x)

        mouth_region = img[int(kp_y):int(ymax), int(xmin):int(xmax)]
        return mouth_region

    def batch_mouth_detection(self, frames, detections, with_keypoints=True, img_dims= (128, 128)):

        """
            return mouth regions for a batch of frames along with status if any frame was skipped while keypoint finding
            mouth_regions: mouth rois
            flag: boolean if a frame is skipped ; True if a frame is skipped else False 
        """
        resize_frames = []
        for frame in frames:
            if frame.shape[0] !=self.img_dims[0] or frame.shape[1] != self.img_dims[1]:
                frame = resize(frame, self.img_dims)
                resize_frames.append(frame)
            else:
                resize_frames.append(frame)
        # print(len(resize_frames))

#         frames = torch.from_numpy(np.array(resize_frames))

#         if isinstance(detections, torch.Tensor):
#             detections = detections.cpu().numpy()

        # if len(detections) == 2:
        #     detections = np.expand_dims(detections, axis=1)

        # print("Found %d faces" % detections.shape[0])
        i = 0 # first face detection
        k = 2 # nose keypoint
        # for i in range(detections.shape[0]): #for all faces
        # print(len(detections))
        # print('########')
        # print(kp_y, kp_x)
        mouth_regions = []
        for index, img in enumerate(frames):
            if len(detections[index]) > 0:
                try:
                    ymin = detections[index][i, 0] * img_dims[0]
                    xmin = detections[index][i, 1] * img_dims[1]
                    ymax = detections[index][i, 2] * img_dims[0]
                    xmax = detections[index][i, 3] * img_dims[1]

                    # print(xmin, xmax, ymin, ymax)
                    # for k in range(2,3):  #for all keypoints
                    kp_x = detections[index][i, 4 + k*2    ] * img_dims[1]
                    kp_y = detections[index][i, 4 + k*2 + 1] * img_dims[0]

                    mouth_region = img[int(kp_y):int(ymax), int(xmin):int(xmax)]
                    mouth_regions.append(resize(mouth_region.cpu().numpy(), self.mouth_region_size))
                except IndexError:
                    flag = True
                    break
            else:
                flag = True

        if len(frames) == len(mouth_regions):
            flag = False   
        else: 
            flag = True
        # print(len(mouth_region_size))
        return np.array(mouth_regions), flag


    
########################## old code


# class VideoHandler(object):
#     def __init__(self, filepaths):
#         self.paths = filepaths
#         self.mp4_filenames = filepaths
#         self.blaze_detector = MouthDetector()
# #         self.mouth_extractor = FaceROIExtractor()
    
#     def read_video_audio_dyn(self, video_path):
#         # print(video_path, audio_path)
#         clip = VideoFileClip(video_path, verbose=False)
#         video_frames = torch.FloatTensor(list(clip.iter_frames()))
#         # video_frames = torch.FloatTensor(list(imageio.get_reader(video_path, 'ffmpeg')))

#         # waveform, sample_rate = torchaudio.load(audio_path)
#         waveform = torch.from_numpy(
#             clip.audio.to_soundarray()).float().permute(1, 0)
#         specgram = torchaudio.transforms.MelSpectrogram()(waveform)
#         return specgram, video_frames

#     def read_video_audio_blaze_roi(self, video_path, frame_len, subdir='train', audio_path=None):

#         frames = [] 
#         mouth_frames  = []
#         mouth_indices = []
#         video_frames = []
#         cap = cv2.VideoCapture(video_path)
#         frame_counter = 0
#         while cap.isOpened():
#             ret, frame = cap.read()
#             if ret:
#                 frame = resize(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), (256,256))*255
                
#                 frames.append(frame.astype(np.uint8))
#                 frame_counter += 1
#             else:
#                 break
#             if frame_counter == 31: # frame length
#                 break
#         cap.release()

#         frames = np.array(frames)
#         frames = (frames - frames.min()) / (frames.max() - frames.min())
#         img = torch.from_numpy(frames).permute(0,3,1,2) * 255
#         detections = self.blaze_detector.net.predict_on_batch(img)
#         mouth_regions, flag = self.blaze_detector.batch_mouth_detection(img.permute(0,2,3,1), detections)
        
#         if not flag:
#             mouth_regions /= 255.
#         return frames, mouth_regions, flag        





# class MouthDetector():

#     def __init__(self):
#         self.net = BlazeFace().to(device)
#         self.net.load_weights("blazeface.pth")
#         self.net.load_anchors("anchors.npy")

#         self.mouth_region_size = (64,64)
#         self.img_dims = (128, 128)

#     def plot_detections(self, img, detections, with_keypoints=True):
#         fig, ax = plt.subplots(1, figsize=(10, 10))
#         ax.grid(False)
#         ax.imshow(img/255.)
        
#         if isinstance(detections, torch.Tensor):
#             detections = detections.cpu().numpy()

#         if detections.ndim == 1:
#             detections = np.expand_dims(detections, axis=0)

#         print("Found %d faces" % detections.shape[0])
            
#         for i in range(detections.shape[0]):
#             ymin = detections[i, 0] * img.shape[0]
#             xmin = detections[i, 1] * img.shape[1]
#             ymax = detections[i, 2] * img.shape[0]
#             xmax = detections[i, 3] * img.shape[1]

#             rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
#                                      linewidth=1, edgecolor="r", facecolor="none", 
#                                      alpha=detections[i, 16])
#             ax.add_patch(rect)
#             print(ymin, ymax, xmin, xmax)
#             if with_keypoints:
#                 for k in range(2,3):
#                     kp_x = detections[i, 4 + k*2    ] * img.shape[1]
#                     kp_y = detections[i, 4 + k*2 + 1] * img.shape[0]
#                     circle = patches.Circle((kp_x, kp_y), radius=0.5, linewidth=1, 
#                                             edgecolor="lightskyblue", facecolor="none", 
#                                             alpha=detections[i, 16])
#                     ax.add_patch(circle)
            
#         plt.show()



#     def mouth_detection(self, img, detections, with_keypoints=True, img_dims=(128,128)):
#         # fig, ax = plt.subplots(1, figsize=(10, 10))
#         # ax.grid(False)
#         # ax.imshow(img/255.)
        
#         if isinstance(detections, torch.Tensor):
#             detections = detections.cpu().numpy()

#         if detections.ndim == 1:
#             detections = np.expand_dims(detections, axis=0)

#         print("Found %d faces" % detections.shape[0])
#         i = 0 # first face detection
#         k = 2 # nose keypoint
#         # for i in range(detections.shape[0]): #for all faces
#         ymin = detections[i, 0] * img_dims[0]
#         xmin = detections[i, 1] * img_dims[1]
#         ymax = detections[i, 2] * img_dims[0]
#         xmax = detections[i, 3] * img_dims[1]

#         # print(xmin, xmax, ymin, ymax)
#         # for k in range(2,3):  #for all keypoints
#         kp_x = detections[i, 4 + k*2    ] * img_dims[1]
#         kp_y = detections[i, 4 + k*2 + 1] * img_dims[0]

#         print('########')
#         print(kp_y, kp_x)

#         mouth_region = img[int(kp_y):int(ymax), int(xmin):int(xmax)]
#         return mouth_region

#     def batch_mouth_detection(self, frames, detections, with_keypoints=True, img_dims= (128, 128)):

#         """
#             return mouth regions for a batch of frames along with status if any frame was skipped while keypoint finding
#             mouth_regions: mouth rois
#             flag: boolean if a frame is skipped ; True if a frame is skipped else False 
#         """
#         resize_frames = []
#         for frame in frames:
#             if frame.shape[0] !=self.img_dims[0] or frame.shape[1] != self.img_dims[1]:
#                 frame = resize(frame, self.img_dims)
#                 resize_frames.append(frame)
#             else:
#                 resize_frames.append(frame.numpy())
#         # print(len(resize_frames))

#         frames = torch.from_numpy(np.array(resize_frames))

#         if isinstance(detections, torch.Tensor):
#             detections = detections.cpu().numpy()

#         # if len(detections) == 2:
#         #     detections = np.expand_dims(detections, axis=1)

#         # print("Found %d faces" % detections.shape[0])
#         i = 0 # first face detection
#         k = 2 # nose keypoint
#         # for i in range(detections.shape[0]): #for all faces
#         # print(len(detections))
#         # print('########')
#         # print(kp_y, kp_x)
#         mouth_regions = []
#         for index, img in enumerate(frames):
#             if len(detections[index]) > 0:
#                 try:
#                     ymin = detections[index][i, 0] * img_dims[0]
#                     xmin = detections[index][i, 1] * img_dims[1]
#                     ymax = detections[index][i, 2] * img_dims[0]
#                     xmax = detections[index][i, 3] * img_dims[1]

#                     # print(xmin, xmax, ymin, ymax)
#                     # for k in range(2,3):  #for all keypoints
#                     kp_x = detections[index][i, 4 + k*2    ] * img_dims[1]
#                     kp_y = detections[index][i, 4 + k*2 + 1] * img_dims[0]

#                     mouth_region = img[int(kp_y):int(ymax), int(xmin):int(xmax)]
#                     mouth_regions.append(resize(mouth_region, self.mouth_region_size))
#                 except IndexError:
#                     flag = True
#                     break
#             else:
#                 flag = True

#         if len(frames) == len(mouth_regions):
#             flag = False   
#         else: 
#             flag = True
#         # print(len(mouth_region_size))
#         return np.array(mouth_regions), flag
Пример #16
0
import numpy as np
import torch
import cv2
from blazeface import BlazeFace

# some useful info
print("PyTorch version:", torch.__version__)
print("CUDA version:", torch.version.cuda)
print("cuDNN version:", torch.backends.cudnn.version())

gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(gpu)

net = BlazeFace().to(gpu)
net.load_weights("blazeface.pth")
net.load_anchors("anchors.npy")
# let's start the capture now
print("starting camera now....")

#adjust based on your device.  For most cases, normally 0
cap = cv2.VideoCapture(0)

while (True):
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Our operations on the frame come here
    #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (128, 128))
    detections = net.predict_on_image(img)
Пример #17
0
def main(argv):
    args = parse_args(argv)

    ## Parameters parsing
    device: torch.device = args.device
    source_dir: Path = args.source
    facedestination_dir: Path = args.facesfolder
    frames_per_video: int = args.fpv
    videodataset_path: Path = args.videodf
    facesdataset_path: Path = args.facesdf
    collateonly: bool = args.collateonly
    batch_size: int = args.batch
    threads: int = args.threads
    offset: int = args.offset
    num: int = args.num
    lazycheck: bool = args.lazycheck
    deepcheck: bool = args.deepcheck
    checkpoint_folder: Path = args.checkpoint
    index_enable: bool = args.noindex

    ## Parameters
    face_size = 512

    print('Loading video DataFrame')
    df_videos = pd.read_pickle(videodataset_path)

    if num > 0:
        df_videos_process = df_videos.iloc[offset:offset + num]
    else:
        df_videos_process = df_videos.iloc[offset:]

    if not collateonly:

        ## Blazeface loading
        print('Loading face extractor')
        facedet = BlazeFace().to(device)
        facedet.load_weights("blazeface/blazeface.pth")
        facedet.load_anchors("blazeface/anchors.npy")
        videoreader = VideoReader(verbose=False)
        video_read_fn = lambda x: videoreader.read_frames(x, num_frames=frames_per_video)
        face_extractor = FaceExtractor(video_read_fn, facedet)

        ## Face extraction
        with ThreadPoolExecutor(threads) as p:
            for batch_idx0 in tqdm(np.arange(start=0, stop=len(df_videos_process), step=batch_size),
                                   desc='Extracting faces'):
                tosave_list = list(p.map(partial(process_video,
                                                 source_dir=source_dir,
                                                 facedestination_dir=facedestination_dir,
                                                 checkpoint_folder=checkpoint_folder,
                                                 face_size=face_size,
                                                 face_extractor=face_extractor,
                                                 lazycheck=lazycheck,
                                                 deepcheck=deepcheck,
                                                 ),
                                         df_videos_process.iloc[batch_idx0:batch_idx0 + batch_size].iterrows()))

                for tosave in tosave_list:
                    if tosave is not None:
                        if len(tosave[2]):
                            list(p.map(save_jpg, tosave[2]))
                        tosave[1].parent.mkdir(parents=True, exist_ok=True)
                        tosave[0].to_pickle(str(tosave[1]))

    if index_enable:
        # Collect checkpoints
        df_videos['nfaces'] = np.zeros(len(df_videos), np.uint8)
        faces_dataset = []
        for idx, record in tqdm(df_videos.iterrows(), total=len(df_videos), desc='Collecting faces results'):
            # Checkpoint
            video_face_checkpoint_path = checkpoint_folder.joinpath(record['path']).with_suffix('.faces.pkl')
            if video_face_checkpoint_path.exists():
                try:
                    df_video_faces = pd.read_pickle(str(video_face_checkpoint_path))
                    # Fix same attribute issue
                    df_video_faces = df_video_faces.rename(columns={'subject': 'videosubject'}, errors='ignore')
                    nfaces = len(
                        np.unique(df_video_faces.index.map(lambda x: int(x.split('_subj')[1].split('.jpg')[0]))))
                    df_videos.loc[idx, 'nfaces'] = nfaces
                    faces_dataset.append(df_video_faces)
                except Exception as e:
                    print('Error while reading: {}'.format(video_face_checkpoint_path))
                    print(e)
                    video_face_checkpoint_path.unlink()

        if len(faces_dataset) == 0:
            raise ValueError(f'No checkpoint found from face extraction. '
                             f'Is the the source path {source_dir} correct for the videos in your dataframe?')

        # Save videos with updated faces
        print('Saving videos DataFrame to {}'.format(videodataset_path))
        df_videos.to_pickle(str(videodataset_path))

        if offset > 0:
            if num > 0:
                if facesdataset_path.is_dir():
                    facesdataset_path = facesdataset_path.joinpath(
                        'faces_df_from_video_{}_to_video_{}.pkl'.format(offset, num + offset))
                else:
                    facesdataset_path = facesdataset_path.parent.joinpath(
                        str(facesdataset_path.parts[-1]).split('.')[0] + '_from_video_{}_to_video_{}.pkl'.format(offset,
                                                                                                                 num + offset))
            else:
                if facesdataset_path.is_dir():
                    facesdataset_path = facesdataset_path.joinpath('faces_df_from_video_{}.pkl'.format(offset))
                else:
                    facesdataset_path = facesdataset_path.parent.joinpath(
                        str(facesdataset_path.parts[-1]).split('.')[0] + '_from_video_{}.pkl'.format(offset))
        elif num > 0:
            if facesdataset_path.is_dir():
                facesdataset_path = facesdataset_path.joinpath(
                    'faces_df_from_video_{}_to_video_{}.pkl'.format(0, num))
            else:
                facesdataset_path = facesdataset_path.parent.joinpath(
                    str(facesdataset_path.parts[-1]).split('.')[0] + '_from_video_{}_to_video_{}.pkl'.format(0, num))
        else:
            if facesdataset_path.is_dir():
                facesdataset_path = facesdataset_path.joinpath('faces_df.pkl')  # just a check if the path is a dir

        # Creates directory (if doesn't exist)
        facesdataset_path.parent.mkdir(parents=True, exist_ok=True)
        print('Saving faces DataFrame to {}'.format(facesdataset_path))
        df_faces = pd.concat(faces_dataset, axis=0, )
        df_faces['video'] = df_faces['video'].astype('category')
        for key in ['kp1x', 'kp1y', 'kp2x', 'kp2y', 'kp3x',
                    'kp3y', 'kp4x', 'kp4y', 'kp5x', 'kp5y', 'kp6x', 'kp6y', 'left',
                    'top', 'right', 'bottom', ]:
            df_faces[key] = df_faces[key].astype(np.int16)
        df_faces['videosubject'] = df_faces['videosubject'].astype(np.int8)
        # Eventually remove duplicates
        df_faces = df_faces.loc[~df_faces.index.duplicated(keep='first')]
        fields_to_preserve_from_video = [i for i in ['folder', 'subject', 'scene', 'cluster', 'nfaces'] if
                                         i in df_videos]
        df_faces = pd.merge(df_faces, df_videos[fields_to_preserve_from_video], left_on='video',
                            right_index=True)
        df_faces.to_pickle(str(facesdataset_path))

    print('Completed!')
Пример #18
0
import numpy as np
import torch
import cv2
import sys

from blazeface import BlazeFace

gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.set_grad_enabled(False)

back_detector = True

model = BlazeFace(back_model=back_detector).to(gpu)
if back_detector:
    model.load_weights("blazefaceback.pth")
    model.load_anchors("anchors_face_back.npy")
else:
    model.load_weights("blazeface.pth")
    model.load_anchors("anchors_face.npy")

##############################################################################
batch_size = 1
#height = 128
#width = 128
height = 256
width = 256
x = torch.randn((batch_size, height, width, 3),
                requires_grad=True).byte().to(gpu)
opset = 12
##############################################################################