Exemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--video_input_path', type=str, required=True)
    parser.add_argument('--output_path', type=str, required=True)
    parser.add_argument('--detect_every_N_frame', type=int, default=8)
    parser.add_argument('--scalar_face_detection', type=float, default=1.5)
    parser.add_argument('--number_of_speakers', type=int, default=2)
    args = parser.parse_args()

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print('Running on device: {}'.format(device))
    utils.mkdirs(os.path.join(args.output_path, 'faces'))

    landmarks_dic = {}
    faces_dic = {}
    boxes_dic = {}
    for i in range(args.number_of_speakers):
        landmarks_dic[i] = []
        faces_dic[i] = []
        boxes_dic[i] = []

    mtcnn = MTCNN(keep_all=True, device=device)

    video = mmcv.VideoReader(args.video_input_path)
    print("Video statistics: ", video.width, video.height, video.resolution,
          video.fps)
    frames = [
        Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        for frame in video
    ]
    print('Number of frames in video: ', len(frames))
    fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                                      flip_input=False)

    for i, frame in enumerate(frames):
        print('\rTracking frame: {}'.format(i + 1), end='')

        # Detect faces
        if i % args.detect_every_N_frame == 0:
            boxes, _ = mtcnn.detect(frame)
            boxes = boxes[:args.number_of_speakers]
            boxes = face2head(boxes, args.scalar_face_detection)
        else:
            boxes = [boxes_dic[j][-1] for j in range(args.number_of_speakers)]

        # Crop faces and save landmarks for each speaker
        if len(boxes) != args.number_of_speakers:
            boxes = [boxes_dic[j][-1] for j in range(args.number_of_speakers)]

        for j, box in enumerate(boxes):
            face = frame.crop((box[0], box[1], box[2], box[3])).resize(
                (224, 224))
            preds = fa.get_landmarks(np.array(face))
            if i == 0:
                faces_dic[j].append(face)
                landmarks_dic[j].append(preds)
                boxes_dic[j].append(box)
            else:
                iou_scores = []
                for b_index in range(args.number_of_speakers):
                    last_box = boxes_dic[b_index][-1]
                    iou_score = bb_intersection_over_union(box, last_box)
                    iou_scores.append(iou_score)
                box_index = iou_scores.index(max(iou_scores))
                faces_dic[box_index].append(face)
                landmarks_dic[box_index].append(preds)
                boxes_dic[box_index].append(box)

    for s in range(args.number_of_speakers):
        frames_tracked = []
        for i, frame in enumerate(frames):
            # Draw faces
            frame_draw = frame.copy()
            draw = ImageDraw.Draw(frame_draw)
            draw.rectangle(boxes_dic[s][i], outline=(255, 0, 0), width=6)
            # Add to frame list
            frames_tracked.append(frame_draw)
        dim = frames_tracked[0].size
        fourcc = cv2.VideoWriter_fourcc(*'FMP4')
        video_tracked = cv2.VideoWriter(
            os.path.join(args.output_path,
                         'video_tracked' + str(s + 1) + '.mp4'), fourcc, 25.0,
            dim)
        for frame in frames_tracked:
            video_tracked.write(
                cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
        video_tracked.release()

    # Save landmarks
    for i in range(args.number_of_speakers):
        utils.save2npz(os.path.join(args.output_path, 'landmark',
                                    'speaker' + str(i + 1) + '.npz'),
                       data=landmarks_dic[i])
        dim = face.size
        fourcc = cv2.VideoWriter_fourcc(*'FMP4')
        speaker_video = cv2.VideoWriter(
            os.path.join(args.output_path, 'faces',
                         'speaker' + str(i + 1) + '.mp4'), fourcc, 25.0, dim)
        for frame in faces_dic[i]:
            speaker_video.write(
                cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
        speaker_video.release()

    # Output video path
    parts = args.video_input_path.split('/')
    video_name = parts[-1][:-4]
    if not os.path.exists(os.path.join(args.output_path, 'filename_input')):
        os.mkdir(os.path.join(args.output_path, 'filename_input'))
    csvfile = open(
        os.path.join(args.output_path, 'filename_input',
                     str(video_name) + '.csv'), 'w')
    for i in range(args.number_of_speakers):
        csvfile.write('speaker' + str(i + 1) + ',0\n')
    csvfile.close()
Exemplo n.º 2
0
    def __init__(self, model_path="grid", gpu=-1):

        if model_path == "grid":
            model_path = os.path.split(__file__)[0] + "/data/grid.dat"
        elif model_path == "timit":
            model_path = os.path.split(__file__)[0] + "/data/timit.dat"
        elif model_path == "crema":
            model_path = os.path.split(__file__)[0] + "/data/crema.dat"

        if gpu < 0:
            self.device = torch.device("cpu")
            model_dict = torch.load(model_path, map_location=lambda storage, loc: storage)
            self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device="cpu", flip_input=False)
        else:
            self.device = torch.device("cuda:" + str(gpu))
            model_dict = torch.load(model_path, map_location=lambda storage, loc: storage.cuda(gpu))
            self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device="cuda:" + str(gpu),
                                                   flip_input=False)

        self.stablePntsIDs = [33, 36, 39, 42, 45]
        self.mean_face = model_dict["mean_face"]
        self.img_size = model_dict["img_size"]
        self.audio_rate = model_dict["audio_rate"]
        self.video_rate = model_dict["video_rate"]
        self.audio_feat_len = model_dict['audio_feat_len']
        self.audio_feat_samples = model_dict['audio_feat_samples']
        self.id_enc_dim = model_dict['id_enc_dim']
        self.rnn_gen_dim = model_dict['rnn_gen_dim']
        self.aud_enc_dim = model_dict['aud_enc_dim']
        self.aux_latent = model_dict['aux_latent']
        self.sequential_noise = model_dict['sequential_noise']

        self.img_transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((self.img_size[0], self.img_size[1])),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

        self.audio_transform = torchaudio.transforms.Scale()

        self.encoder = RNN(self.audio_feat_len, self.aud_enc_dim, self.rnn_gen_dim,
                           self.audio_rate, init_kernel=0.005, init_stride=0.001)
        self.encoder.to(self.device)
        self.encoder.load_state_dict(model_dict['encoder'])

        self.encoder_id = Encoder(self.id_enc_dim, self.img_size)
        self.encoder_id.to(self.device)
        self.encoder_id.load_state_dict(model_dict['encoder_id'])

        skip_channels = list(self.encoder_id.channels)
        skip_channels.reverse()

        self.generator = Generator(self.img_size, self.rnn_gen_dim, condition_size=self.id_enc_dim,
                                   num_gen_channels=self.encoder_id.channels[-1],
                                   skip_channels=skip_channels, aux_size=self.aux_latent,
                                   sequential_noise=self.sequential_noise)

        self.generator.to(self.device)
        self.generator.load_state_dict(model_dict['generator'])

        self.encoder.eval()
        self.encoder_id.eval()
        self.generator.eval()
Exemplo n.º 3
0
        if f.endswith('.jpg') or f.endswith('.jpeg') or f.endswith('.png'):
            log(f'{i}: {f}')
            img = imageio.imread(f)
            if img.ndim == 2:
                img = np.tile(img[..., None], [1, 1, 3])
            img = resize(img, (256, 256))[..., :3]
            avatars.append(img)

    log('load checkpoints..')

    generator, kp_detector = load_checkpoints(config_path=opt.config,
                                              checkpoint_path=opt.checkpoint,
                                              device=device)

    fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                                      flip_input=True,
                                      device=device)

    # cap = cv2.VideoCapture(opt.cam)
    cap = VideoCaptureAsync(opt.cam)
    if not cap.isOpened():
        log("Cannot open camera. Try to choose other CAMID in './scripts/settings.sh'"
            )
        exit()
    cap.start()

    ret, frame = cap.read()
    if not ret:
        log("Cannot read from camera")
        exit()
Exemplo n.º 4
0
 def __init__(self):
     import face_alignment
     self.model = face_alignment.FaceAlignment(
         face_alignment.LandmarksType._2D, flip_input=False)
import face_alignment
from matplotlib import pyplot as plt


parser = argparse.ArgumentParser()
parser.add_argument('--data-dir')
parser.add_argument('--output')

args = parser.parse_args()

path_to_mp4 = args.data_dir
K = 8
num_vid = 0
device = torch.device('cuda:0')
saves_dir = args.output
face_aligner = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device ='cuda:0')

if not os.path.isdir(saves_dir):
    os.mkdir(saves_dir)

def generate_landmarks(frames_list, face_aligner):
    frame_landmark_list = []
    fa = face_aligner
    
    for i in range(len(frames_list)):
        try:
            input = frames_list[i]
            preds = fa.get_landmarks(input)[0]

            dpi = 100
            fig = plt.figure(figsize=(input.shape[1]/dpi, input.shape[0]/dpi), dpi = dpi)
    def __init__(self):
        self.subjects = dict()
        self.bridge = CvBridge()
        self.__subject_bridge = SubjectListBridge()

        self.margin = rospy.get_param("~margin", 42)
        self.margin_eyes_height = rospy.get_param("~margin_eyes_height", 36)
        self.margin_eyes_width = rospy.get_param("~margin_eyes_width", 60)
        self.interpupillary_distance = rospy.get_param(
            "~interpupillary_distance", default=0.058)
        self.cropped_face_size = (rospy.get_param("~face_size_height", 224),
                                  rospy.get_param("~face_size_width", 224))

        self.rgb_frame_id = rospy.get_param("~rgb_frame_id", "/kinect2_link")
        self.rgb_frame_id_ros = rospy.get_param("~rgb_frame_id_ros",
                                                "/kinect2_nonrotated_link")

        self.model_points = None
        self.eye_image_size = (rospy.get_param("~eye_image_height", 36),
                               rospy.get_param("~eye_image_width", 60))

        self.tf_broadcaster = TransformBroadcaster()
        self.tf_listener = TransformListener()
        self.tf_prefix = rospy.get_param("~tf_prefix", default="gaze")

        self.use_previous_headpose_estimate = True
        self.last_rvec = {}
        self.last_tvec = {}
        self.pose_stabilizers = {}  # Introduce scalar stabilizers for pose.

        try:
            tqdm.write("Wait for camera message")
            cam_info = rospy.wait_for_message("/camera_info",
                                              CameraInfo,
                                              timeout=None)
            self.img_proc = PinholeCameraModel()
            # noinspection PyTypeChecker
            self.img_proc.fromCameraInfo(cam_info)
            if np.array_equal(
                    self.img_proc.intrinsicMatrix(),
                    np.matrix([[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])):
                raise Exception(
                    'Camera matrix is zero-matrix. Did you calibrate'
                    'the camera and linked to the yaml file in the launch file?'
                )
            tqdm.write("Camera message received")
        except rospy.ROSException:
            raise Exception("Could not get camera info")

        # multiple person images publication
        self.subject_pub = rospy.Publisher("/subjects/images",
                                           MSG_SubjectImagesList,
                                           queue_size=1)
        # multiple person faces publication for visualisation
        self.subject_faces_pub = rospy.Publisher("/subjects/faces",
                                                 Image,
                                                 queue_size=1)

        self.model_points = self._get_full_model_points()

        self.sess_bb = None
        self.face_net = FaceDetector(device="cuda:0")

        self.color_sub = rospy.Subscriber("/image",
                                          Image,
                                          self.callback,
                                          buff_size=2**24,
                                          queue_size=1)

        self.facial_landmark_nn = face_alignment.FaceAlignment(
            landmarks_type=face_alignment.LandmarksType._2D,
            device="cuda:0",
            flip_input=False)
Exemplo n.º 7
0
                                img_tensor.shape[2])
                input.data.copy_(img_tensor)

                output = mission2_face(input)

                output = output.cpu().view(output.shape[0])
                cur_prob = output.data[0]
                if max_prob < cur_prob:
                    max_prob = cur_prob

        cur_result['prob'] = max_prob
        prediction_results.append(cur_result)

    # save result
    save_result_to_txt(
        prediction_results,
        os.path.join(_res_dir, 'mission2_%s.txt' % kResultFilePostfix))


if __name__ == "__main__":

    fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                                      enable_cuda=True,
                                      flip_input=True)

    do_mission_1(options.data_dir, options.result_dir, fa)
    do_mission_2(options.data_dir, options.result_dir, fa)

# ()()
# ('') HAANJU & YEOLJERRY
Exemplo n.º 8
0
 def _init_(self, **config):
     self.fa = face_alignment.FaceAlignment(
         face_alignment.LandmarksType._2D, flip_input=False, device='cpu')
Exemplo n.º 9
0
def parse_arg():
    parser = argparse.ArgumentParser(
        description=f'Script for face detection and landmark retrieving')

    parser.add_argument('--image-path', type=str, help=f'Path to image file')

    args = parser.parse_args()

    return args


if __name__ == '__main__':
    args = parse_arg()

    fa_model = face_alignment.FaceAlignment(
        landmarks_type=face_alignment.LandmarksType._2D, )

    image_path = args.image_path
    image = read_image(image_path=image_path)
    # image = np.transpose(image, axes=(2, 1, 0))

    image_batch = np.concatenate([image[np.newaxis, :]] * 10, axis=0)
    image_batch = torch.tensor(image_batch)

    print(image_batch.size())

    res_facial_info = fa_model.get_landmarks(image_or_path=image)

    # res_facial_info = [res_facial_info[0][:17]]
    # res_facial_info = [res_facial_info[0][17:22]]
    # res_facial_info = [res_facial_info[0][22:27]]
Exemplo n.º 10
0
NOSE_H = int(32 / 128 * img_size)
NOSE_W = int(40 / 128 * img_size)
MOUTH_H = int(32 / 128 * img_size)
MOUTH_W = int(48 / 128 * img_size)


def wait():
    while True:
        key = cv2.waitKeyEx(10)
        if key == ord('q'):
            break


tpgan = TPGAN(generator_weights=GENERATOR_WEIGHTS_FILE)
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2halfD,
                                  device='cpu',
                                  flip_input=False)

img = cv2.imread(IMG_PATH)

landmarks = fa.get_landmarks(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
assert landmarks is not None

points = landmarks[0]
reye = np.average(np.array((points[37], points[38], points[40], points[41])),
                  axis=0)
leye = np.average(np.array((points[43], points[44], points[46], points[47])),
                  axis=0)
mouth = np.average(np.array((points[51], points[57])), axis=0)
nose_tip = points[30]
# From: https://github.com/1adrianb/face-alignment

import face_alignment
from skimage import io
import numpy as np
import os
import time
from joblib import Parallel, delayed
import shutil

baseDir = "/home/socialvv/socialvv"

fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D)


def delete_helper(cam, ID):
    delDir = os.path.join(baseDir, f'ID{ID}', f'cam{cam}-wav2lip', 'landmarks')
    outDir = os.path.join(baseDir, f'ID{ID}', f'cam{cam}-wav2lip',
                          'landmarksv2')

    if os.path.isdir(delDir):
        shutil.rmtree(delDir)

    if os.path.isdir(outDir):
        shutil.rmtree(outDir)


def parallel_generation(cam, ID):
    frameDir = os.path.join(baseDir, f'ID{ID}', f'cam{cam}-wav2lip', 'frames')
    boundingBoxFile = os.path.join(
        baseDir, f'ID{ID}', 'bounding-boxes',
Exemplo n.º 12
0
import face_alignment
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from skimage import io
import os

# Run the 3D face alignment on a test image, without CUDA.
#fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3, device='cuda:0', flip_input=True)
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D,
                                  device='cuda:0',
                                  flip_input=True)
path = '../test/assets/yates/'
imgs = os.listdir(path)
print(imgs)

for img in imgs:
    input = io.imread(path + img)
    preds = fa.get_landmarks(input)[-1]
    print(preds)
    #TODO: Make this nice
    fig = plt.figure(figsize=plt.figaspect(1), frameon=False)
    #fig = plt.figure(frameon=False)
    ax = fig.add_subplot(1, 1, 1)
    #ax = plt.axes([0,0,1,1], frameon=False)
    ax.imshow(input)
    ax.axis('off')
    lwS = 1
    mS = 3
    ax.plot(preds[0:17, 0],
            preds[0:17, 1],
import face_alignment
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from skimage import io
import collections

# Optionally set detector and some additional detector parameters
face_detector = 'sfd'
face_detector_kwargs = {"filter_threshold": 0.8}

# Run the 3D face alignment on a test image, without CUDA.
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D,
                                  device='cpu',
                                  flip_input=True,
                                  face_detector=face_detector,
                                  face_detector_kwargs=face_detector_kwargs)

try:
    input_img = io.imread('../test/assets/aflw-test.jpg')
except FileNotFoundError:
    input_img = io.imread('test/assets/aflw-test.jpg')

preds = fa.get_landmarks(input_img)[-1]

# 2D-Plot
plot_style = dict(marker='o', markersize=4, linestyle='-', lw=2)

pred_type = collections.namedtuple('prediction_type', ['slice', 'color'])
pred_types = {
    'face': pred_type(slice(0, 17), (0.682, 0.780, 0.909, 0.5)),
    'eyebrow1': pred_type(slice(17, 22), (1.0, 0.498, 0.055, 0.4)),
    def __init__(self, opt_parser, single_test=False):
        print('Run on device {}'.format(device))

        # for key in vars(opt_parser).keys():
        #     print(key, ':', vars(opt_parser)[key])
        self.opt_parser = opt_parser

        # model
        if (opt_parser.add_audio_in):
            self.G = ResUnetGenerator(input_nc=7,
                                      output_nc=3,
                                      num_downs=6,
                                      use_dropout=False)
        else:
            self.G = ResUnetGenerator(input_nc=6,
                                      output_nc=3,
                                      num_downs=6,
                                      use_dropout=False)

        if (opt_parser.load_G_name != ''):
            ckpt = torch.load(opt_parser.load_G_name)
            try:
                self.G.load_state_dict(ckpt['G'])
            except:
                tmp = nn.DataParallel(self.G)
                tmp.load_state_dict(ckpt['G'])
                self.G.load_state_dict(tmp.module.state_dict())
                del tmp

        if torch.cuda.device_count() > 1:
            print("Let's use", torch.cuda.device_count(), "GPUs in G mode!")
            self.G = nn.DataParallel(self.G)

        self.G.to(device)

        if (not single_test):
            # dataset
            if (opt_parser.use_vox_dataset == 'raw'):
                if (opt_parser.comb_fan_awing):
                    from dataset.image_translation.image_translation_dataset import \
                        image_translation_raw74_dataset as image_translation_dataset
                elif (opt_parser.add_audio_in):
                    from dataset.image_translation.image_translation_dataset import image_translation_raw98_with_audio_dataset as \
                        image_translation_dataset
                else:
                    from dataset.image_translation.image_translation_dataset import image_translation_raw98_dataset as \
                    image_translation_dataset
            else:
                from dataset.image_translation.image_translation_dataset import image_translation_preprocessed98_dataset as \
                    image_translation_dataset

            self.dataset = image_translation_dataset(
                num_frames=opt_parser.num_frames)
            self.dataloader = torch.utils.data.DataLoader(
                self.dataset,
                batch_size=opt_parser.batch_size,
                shuffle=True,
                num_workers=opt_parser.num_workers)

            # criterion
            self.criterionL1 = nn.L1Loss()
            self.criterionVGG = VGGLoss()
            if torch.cuda.device_count() > 1:
                print("Let's use", torch.cuda.device_count(),
                      "GPUs in VGG model!")
                self.criterionVGG = nn.DataParallel(self.criterionVGG)
            self.criterionVGG.to(device)

            # optimizer
            self.optimizer = torch.optim.Adam(self.G.parameters(),
                                              lr=opt_parser.lr,
                                              betas=(0.5, 0.999))

            # writer
            if (opt_parser.write):
                self.writer = SummaryWriter(
                    log_dir=os.path.join(opt_parser.log_dir, opt_parser.name))
                self.count = 0

            # ===========================================================
            #       online landmark alignment : Awing
            # ===========================================================
            PRETRAINED_WEIGHTS = 'thirdparty/AdaptiveWingLoss/ckpt/WFLW_4HG.pth'
            GRAY_SCALE = False
            HG_BLOCKS = 4
            END_RELU = False
            NUM_LANDMARKS = 98

            self.device = torch.device(
                "cuda:0" if torch.cuda.is_available() else "cpu")
            model_ft = models.FAN(HG_BLOCKS, END_RELU, GRAY_SCALE,
                                  NUM_LANDMARKS)

            checkpoint = torch.load(PRETRAINED_WEIGHTS)
            if 'state_dict' not in checkpoint:
                model_ft.load_state_dict(checkpoint)
            else:
                pretrained_weights = checkpoint['state_dict']
                model_weights = model_ft.state_dict()
                pretrained_weights = {k: v for k, v in pretrained_weights.items() \
                                      if k in model_weights}
                model_weights.update(pretrained_weights)
                model_ft.load_state_dict(model_weights)
            print('Load AWing model sucessfully')
            if torch.cuda.device_count() > 1:
                print("Let's use", torch.cuda.device_count(),
                      "GPUs for AWing!")
                self.fa_model = nn.DataParallel(model_ft).to(
                    self.device).eval()
            else:
                self.fa_model = model_ft.to(self.device).eval()

            # ===========================================================
            #       online landmark alignment : FAN
            # ===========================================================
            if (opt_parser.comb_fan_awing):
                if (opt_parser.fan_2or3D == '2D'):
                    self.predictor = face_alignment.FaceAlignment(
                        face_alignment.LandmarksType._2D,
                        device='cuda' if torch.cuda.is_available() else "cpu",
                        flip_input=True)
                else:
                    self.predictor = face_alignment.FaceAlignment(
                        face_alignment.LandmarksType._3D,
                        device='cuda' if torch.cuda.is_available() else "cpu",
                        flip_input=True)
            # return the aligned face
            return output, True, scale, angle, dist
        else:
            return image, False, 0, 0, 0


''' Main part of the code '''

#base_dir_ori = 'D:/Master/Master Data Science/2 Cuatrimestre/Master Thesis/Database/IEMOCAP_cropped/'
#base_dir_des = 'D:/Master/Master Data Science/2 Cuatrimestre/Master Thesis/Database/IEMOCAP_faces_2/'
base_dir_ori = r'/app/data/IEMOCAP_dat1/IEMOCAP_cropped_2/'
base_dir_des = r'/app/data/IEMOCAP_dat1/IEMOCAP_faces_final_2/'
base_dir_des_list = r'/app/data/IEMOCAP_dat1/IEMOCAP_error_final_frames/'

predictor = face_alignment.FaceAlignment(
    face_alignment.LandmarksType._2D, device='cuda',
    flip_input=False)  #or 'gpu' to use gpu
fa = FaceAligner(predictor, desiredFaceWidth=224)

for i in range(0, 2):
    print('SESSION ' + str(i + 1) + ':\n')
    ses_dir_ori = base_dir_ori + r'Session' + str(i + 1) + r'/'
    ses_dir_des = base_dir_des + r'Session' + str(i + 1) + r'/'
    ses_dir_des_list = base_dir_des_list + r'Session' + str(i + 1) + r'/'

    videos = os.listdir(ses_dir_ori)

    for video in videos:
        print(video)

        video_folder = ses_dir_des + video[:-4] + '/'
Exemplo n.º 16
0
parser.add_argument('--lr', type=float, default=1e-3, help='learning rate')
parser.add_argument('--reg_lr', type=float, default=1e-6, help='weight decay')
parser.add_argument('--write', default=False, action='store_true')
parser.add_argument('--segment_batch_size',
                    type=int,
                    default=1,
                    help='batch size')
parser.add_argument('--emb_coef', default=3.0, type=float)
parser.add_argument('--lambda_laplacian_smooth_loss', default=1.0, type=float)
parser.add_argument('--use_11spk_only', default=False, action='store_true')

opt_parser = parser.parse_args()
''' STEP 1: preprocess input single image '''
img = cv2.imread(opt_parser.jpg)
predictor = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                                         device='cuda',
                                         flip_input=True)
shapes = predictor.get_landmarks(img)
if (not shapes or len(shapes) != 1):
    print('Cannot detect face landmarks. Exit.')
    exit(-1)
shape_3d = shapes[0]
shape_3d = np.concatenate([shape_3d, np.ones(shape=(68, 1))], axis=1)

# # close mouth
# shape_3d = shape_3d.reshape((1, 68, 3))
# index1 = list(range(60-1, 55-1, -1))
# index2 = list(range(68-1, 65-1, -1))
# mean_out = 0.5 * (shape_3d[:, 49:54] + shape_3d[:, index1])
# mean_in = 0.5 * (shape_3d[:, 61:64] + shape_3d[:, index2])
# shape_3d[:, 50:53] -= (shape_3d[:, 61:64] - mean_in) * 0.7
Exemplo n.º 17
0
 def test_predict_points(self):
     fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, enable_cuda=False)
     fa.get_landmarks('test/assets/aflw-test.jpg')
Exemplo n.º 18
0
#!/usr/bin/env python3

import os
import time
import datetime
import cv2
import numpy as np
from PIL import Image
from PIL.ExifTags import TAGS
import face_alignment
import sys

fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                                  device='cpu')


def extract_face(img):
    faces = fa.get_landmarks_from_image(img)

    if len(faces) == 0:
        return None

    return faces[0]


def master_align(img, face):
    A = np.zeros((2 * face.shape[0], 4))
    A[0::2, 0] = face[:, 0]
    A[0::2, 1] = face[:, 1]
    A[0::2, 2] = 1
    A[1::2, 0] = face[:, 1]
def generate_landmarks(frames_list):
    frame_landmark_list = []
    fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                                      flip_input=False,
                                      device='cuda:0')

    for i in range(len(frames_list)):
        try:
            input = frames_list[i]
            preds = fa.get_landmarks(input)[0]

            dpi = 100
            fig = plt.figure(figsize=(input.shape[1] / dpi,
                                      input.shape[0] / dpi),
                             dpi=dpi)
            ax = fig.add_subplot(1, 1, 1)
            ax.imshow(np.ones(input.shape))
            plt.subplots_adjust(left=0, right=1, top=1, bottom=0)

            #chin
            ax.plot(preds[0:17, 0],
                    preds[0:17, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='green',
                    lw=2)
            #left and right eyebrow
            ax.plot(preds[17:22, 0],
                    preds[17:22, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='orange',
                    lw=2)
            ax.plot(preds[22:27, 0],
                    preds[22:27, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='orange',
                    lw=2)
            #nose
            ax.plot(preds[27:31, 0],
                    preds[27:31, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='blue',
                    lw=2)
            ax.plot(preds[31:36, 0],
                    preds[31:36, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='blue',
                    lw=2)
            #left and right eye
            ax.plot(preds[36:42, 0],
                    preds[36:42, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='red',
                    lw=2)
            ax.plot(preds[42:48, 0],
                    preds[42:48, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='red',
                    lw=2)
            #outer and inner lip
            ax.plot(preds[48:60, 0],
                    preds[48:60, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='purple',
                    lw=2)
            ax.plot(preds[60:68, 0],
                    preds[60:68, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='pink',
                    lw=2)
            ax.axis('off')

            fig.canvas.draw()

            data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
            data = data.reshape(fig.canvas.get_width_height()[::-1] + (3, ))

            frame_landmark_list.append((input, data))
            plt.close(fig)
        except:
            print('Error: Video corrupted or no landmarks visible')

    for i in range(len(frames_list) - len(frame_landmark_list)):
        #filling frame_landmark_list in case of error
        frame_landmark_list.append(frame_landmark_list[i])

    return frame_landmark_list
import face_alignment
from skimage import io

fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                                  flip_input=False)

input = io.imread(
    'examples/Pictures2020/50_org_ref_Europaerinnen/98-04358_CF-0-1dummy0098_AGE_30_GLASSES_false_ETHNICITY_caucasian_TEINT_1.jpg'
)
preds = fa.get_landmarks(input)
print(preds)
def main(txtfile,
         out_dir,
         models_root,
         min_size=120,
         frame_sample_ratio=0.1,
         min_samples=5,
         sample_limit=100,
         min_res=720,
         id_limit=7,
         cudev=0):

    # vid_paths = glob(os.path.join(in_dir, '*.mp4'))  <-- in_dir replaced with vid_paths
    os.environ["CUDA_VISIBLE_DEVICES"] = str(
        cudev)  #  <-- added in order to use multiple gpus to process IJB-C

    vid_paths = []
    with open(txtfile) as a:
        for line in a:
            vid_paths.append(line.rstrip())

    # INITIALIZE MODELS
    device, gpus = utils.set_device()

    # Initialize detection and landmarks extraction
    fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                                      flip_input=False)

    # Initialize eulers angles model
    Gp = hopenet.Hopenet()
    Gp.to(device)
    path2w = "{}/hopenet_robust_alpha1.pkl".format(models_root)
    weights = torch.load(path2w)
    Gp.load_state_dict(weights)
    Gp.eval()

    # Initialize verification model
    verificator = verification.ImageToFeatures(models_root, device, fa)

    # For each video file
    for vid_path in sorted(vid_paths):
        vid_name = os.path.splitext(os.path.basename(vid_path))[0]
        curr_out_dir = os.path.join(out_dir, vid_name)

        if os.path.exists(curr_out_dir):
            print('Skipping "%s"' % vid_name)
            continue
        else:
            print('Processing "%s"...' % vid_name)
            # os.mkdir(curr_out_dir)

        # Process video
        try:
            video_landmark_keyframes.main(vid_path, curr_out_dir, fa, Gp,
                                          verificator, device, min_size,
                                          frame_sample_ratio, min_samples,
                                          sample_limit, min_res, id_limit)
        except Exception as e:
            logging.error(traceback.format_exc())

    # Parse program arguments
    import argparse
    parser = argparse.ArgumentParser('video_landmarks_keyframes_batch')
    parser.add_argument('input', metavar='DIR', help='input directory')
    parser.add_argument('-m',
                        '--models_root',
                        metavar='DIR',
                        help='model root directory')
    parser.add_argument('-o',
                        '--output',
                        metavar='DIR',
                        help='output directory')
    parser.add_argument('-mb',
                        '--min_bbox_size',
                        default=200,
                        type=int,
                        metavar='N',
                        help='minimum bounding box size')
    parser.add_argument('-fs',
                        '--frame_samples',
                        default=0.1,
                        type=float,
                        metavar='F',
                        help='the number of samples per video')
    parser.add_argument('-ms',
                        '--min_samples',
                        default=5,
                        type=int,
                        metavar='N',
                        help='the limit on the number of samples')
    parser.add_argument('-sl',
                        '--sample_limit',
                        default=100,
                        type=int,
                        metavar='N',
                        help='the limit on the number of samples')
    parser.add_argument('-mr',
                        '--min_res',
                        default=720,
                        type=int,
                        metavar='N',
                        help='minimum video resolution (height pixels)')
    parser.add_argument('-il',
                        '--id_limit',
                        default=7,
                        type=int,
                        metavar='N',
                        help='the limit on the number of identities')
    args = parser.parse_args()
    main(args.input, args.output, args.models_root, args.min_bbox_size,
         args.frame_samples, args.min_samples, args.sample_limit, args.min_res,
         args.id_limit)
Exemplo n.º 22
0
    
    converter = pylon.ImageFormatConverter()
    
    # converting to opencv bgr format
    converter.OutputPixelFormat = pylon.PixelType_BGR8packed
    converter.OutputBitAlignment = pylon.OutputBitAlignment_MsbAligned
    
    
    # Print the model name of the camera.
    print("Using device ", camera.GetDeviceInfo().GetModelName())

    # Start the grabbing of c_countOfImagesToGrab images.
    # The camera device is parameterized with a default configuration which
    # sets up free-running continuous acquisition.
    camera.StartGrabbingMax(10000, pylon.GrabStrategy_LatestImageOnly)
    fa = fa.FaceAlignment(fa.LandmarksType._2D, face_detector='sfd')
    while camera.IsGrabbing():
        # Wait for an image and then retrieve it. A timeout of 5000 ms is used.
        grabResult = camera.RetrieveResult(5000, pylon.TimeoutHandling_ThrowException)

        # Image grabbed successfully?
        if grabResult.GrabSucceeded():
            image = converter.Convert(grabResult) 
            img = image.GetArray()[]
            
            masked_face, rgb_vals = gr.get_roi(frame, fa)
            imageWindow.SetImage(grabResult)
            imageWindow.Show()
        else:
            print("Error: ",
                  grabResult.ErrorCode)  # grabResult.ErrorDescription does not work properly in python could throw UnicodeDecodeError
import face_alignment
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from skimage import io

# Run the 3D face alignment on a test image, without CUDA.
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D,
                                  enable_cuda=False,
                                  flip_input=False)

input = io.imread('../test/assets/aflw-test.jpg')
preds = fa.get_landmarks(input)[-1]

#TODO: Make this nice
fig = plt.figure(figsize=plt.figaspect(.5))
ax = fig.add_subplot(1, 2, 1)
ax.imshow(input)
ax.plot(preds[0:17, 0],
        preds[0:17, 1],
        marker='o',
        markersize=6,
        linestyle='-',
        color='w',
        lw=2)
ax.plot(preds[17:22, 0],
        preds[17:22, 1],
        marker='o',
        markersize=6,
        linestyle='-',
        color='w',
Exemplo n.º 24
0
def main(args):
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()
    '''
    # 2. load dlib model for face detection and landmark used for face cropping
    if args.dlib_landmark:
        dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat'
        face_regressor = dlib.shape_predictor(dlib_landmark_model)
    if args.dlib_bbox:
        face_detector = dlib.get_frontal_face_detector()
    '''
    face_regressor = face_alignment.FaceAlignment(
        face_alignment.LandmarksType._2D, flip_input=False)
    # face_detector = face_regressor.face_detector

    # 3. forward
    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
    for img_fp in args.files:
        img_ori = cv2.imread(img_fp)
        '''
        if args.dlib_bbox:
            rects = face_detector(img_ori, 1)
        else:
            rects = []

        if len(rects) == 0:
            rects = dlib.rectangles()
            rect_fp = img_fp + '.bbox'
            lines = open(rect_fp).read().strip().split('\n')[1:]
            for l in lines:
                l, r, t, b = [int(_) for _ in l.split(' ')[1:]]
                rect = dlib.rectangle(l, r, t, b)
                rects.append(rect)
        '''
        img_rgb = img_ori[:, :, ::-1]
        ptss = face_regressor.get_landmarks(img_rgb)
        pts_res = []
        Ps = []  # Camera matrix collection
        poses = []  # pose collection, [todo: validate it]
        vertices_lst = []  # store multiple face vertices
        ind = 0
        suffix = get_suffix(img_fp)
        for pts in ptss:
            # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping
            if args.dlib_landmark:
                # - use landmark for cropping
                # pts = face_regressor(img_ori, rect).parts()
                # pts = np.array([[pt.x, pt.y] for pt in pts]).T
                roi_box = parse_roi_box_from_landmark(pts.T)
            else:
                # - use detected face bbox
                bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()]
                roi_box = parse_roi_box_from_bbox(bbox)

            img = crop_img(img_ori, roi_box)

            # forward: one step
            img = cv2.resize(img,
                             dsize=(STD_SIZE, STD_SIZE),
                             interpolation=cv2.INTER_LINEAR)
            input = transform(img).unsqueeze(0)
            with torch.no_grad():
                if args.mode == 'gpu':
                    input = input.cuda()
                param = model(input)
                param = param.squeeze().cpu().numpy().flatten().astype(
                    np.float32)

            # 68 pts
            pts68 = predict_68pts(param, roi_box)

            # two-step for more accurate bbox to crop face
            if args.bbox_init == 'two':
                roi_box = parse_roi_box_from_landmark(pts68)
                img_step2 = crop_img(img_ori, roi_box)
                img_step2 = cv2.resize(img_step2,
                                       dsize=(STD_SIZE, STD_SIZE),
                                       interpolation=cv2.INTER_LINEAR)
                input = transform(img_step2).unsqueeze(0)
                with torch.no_grad():
                    if args.mode == 'gpu':
                        input = input.cuda()
                    param = model(input)
                    param = param.squeeze().cpu().numpy().flatten().astype(
                        np.float32)

                pts68 = predict_68pts(param, roi_box)

            pts_res.append(pts68)
            P, pose = parse_pose(param)
            Ps.append(P)
            poses.append(pose)

            # dense face 3d vertices
            if args.dump_ply or args.dump_vertex or args.dump_depth or args.dump_pncc or args.dump_obj:
                vertices = predict_dense(param, roi_box)
                vertices_lst.append(vertices)
            if args.dump_ply:
                dump_to_ply(
                    vertices, tri,
                    '{}_{}.ply'.format(img_fp.replace(suffix, ''), ind))
            if args.dump_vertex:
                dump_vertex(
                    vertices, '{}_{}.mat'.format(img_fp.replace(suffix, ''),
                                                 ind))
            if args.dump_pts:
                wfp = '{}_{}.txt'.format(img_fp.replace(suffix, ''), ind)
                np.savetxt(wfp, pts68, fmt='%.3f')
                print('Save 68 3d landmarks to {}'.format(wfp))
            if args.dump_roi_box:
                wfp = '{}_{}.roibox'.format(img_fp.replace(suffix, ''), ind)
                np.savetxt(wfp, roi_box, fmt='%.3f')
                print('Save roi box to {}'.format(wfp))
            if args.dump_paf:
                wfp_paf = '{}_{}_paf.jpg'.format(img_fp.replace(suffix, ''),
                                                 ind)
                wfp_crop = '{}_{}_crop.jpg'.format(img_fp.replace(suffix, ''),
                                                   ind)
                paf_feature = gen_img_paf(img_crop=img,
                                          param=param,
                                          kernel_size=args.paf_size)

                cv2.imwrite(wfp_paf, paf_feature)
                cv2.imwrite(wfp_crop, img)
                print('Dump to {} and {}'.format(wfp_crop, wfp_paf))
            if args.dump_obj:
                wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind)
                colors = get_colors(img_ori, vertices)
                write_obj_with_colors(wfp, vertices, tri, colors)
                print('Dump obj with sampled texture to {}'.format(wfp))
            ind += 1

        if args.dump_pose:
            # P, pose = parse_pose(param)  # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify)
            img_pose = plot_pose_box(img_ori, Ps, pts_res)
            wfp = img_fp.replace(suffix, '_pose.jpg')
            cv2.imwrite(wfp, img_pose)
            print('Dump to {}'.format(wfp))
        if args.dump_depth:
            wfp = img_fp.replace(suffix, '_depth.png')
            # depths_img = get_depths_image(img_ori, vertices_lst, tri-1)  # python version
            depths_img = cget_depths_image(img_ori, vertices_lst,
                                           tri - 1)  # cython version
            cv2.imwrite(wfp, depths_img)
            print('Dump to {}'.format(wfp))
        if args.dump_pncc:
            wfp = img_fp.replace(suffix, '_pncc.png')
            pncc_feature = cpncc(img_ori, vertices_lst,
                                 tri - 1)  # cython version
            cv2.imwrite(
                wfp,
                pncc_feature[:, :, ::-1])  # cv2.imwrite will swap RGB -> BGR
            print('Dump to {}'.format(wfp))
        if args.dump_res:
            draw_landmarks(img_ori,
                           pts_res,
                           wfp=img_fp.replace(suffix, '_3DDFA.jpg'),
                           show_flg=args.show_flg)
Exemplo n.º 25
0
import face_alignment
from skimage import io
import pandas
import numpy
import os
import sys

fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D,
                                  device='cpu',
                                  flip_input=True)
fa2 = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                                   device='cpu',
                                   flip_input=True)

# Ubah ke csv
# for filename in os.listdir("dataset"):
#     if filename.endswith(".jpg"):
#         print(filename)
#         database = io.imread("dataset/" + filename)
#         preds = fa2.get_landmarks(database)[-1]
#         prediction = pandas.DataFrame(preds)
#         cvs_hasil = prediction.to_csv("csvHasil2d/" + filename + ".csv", index=False)

for filename in os.listdir("Input"):
    ctrTrue = 0
    if filename.endswith(".jpg"):
        print("now testing " + filename)
        input_img = io.imread('Input/' + filename)
        inp = fa.get_landmarks(input_img)[-1]
    for csvname in os.listdir("csvHasil"):
        if csvname.endswith(".csv"):
Exemplo n.º 26
0
def run(dir_faceA, dir_faceB):
    dir_bm_faceA_eyes = f"{dir_faceA}/binary_masks_eyes2"
    dir_bm_faceB_eyes = f"{dir_faceB}/binary_masks_eyes2"

    fns_faceA = glob(f"{dir_faceA}/raw_faces/*.*")
    fns_faceB = glob(f"{dir_faceB}/raw_faces/*.*")

    print('faces', dir_faceA, dir_faceB, dir_bm_faceA_eyes, dir_bm_faceB_eyes)

    fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                                      flip_input=False)
    # !mkdir -p binary_masks/faceA_eyes
    Path(f"{dir_bm_faceA_eyes}").mkdir(parents=True, exist_ok=True)
    # !mkdir -p binary_masks/faceB_eyes
    Path(f"{dir_bm_faceB_eyes}").mkdir(parents=True, exist_ok=True)

    fns_face_not_detected = []

    for idx, fns in enumerate([fns_faceA, fns_faceB]):
        if idx == 0:
            save_path = dir_bm_faceA_eyes
        elif idx == 1:
            save_path = dir_bm_faceB_eyes

        # create binary mask for each training image
        for fn in fns:
            raw_fn = PurePath(fn).parts[-1]

            x = plt.imread(fn)
            x = cv2.resize(x, (256, 256))
            preds = fa.get_landmarks(x)

            if preds is not None:
                preds = preds[0]
                mask = np.zeros_like(x)

                # Draw right eye binary mask
                pnts_right = [(preds[i, 0], preds[i, 1])
                              for i in range(36, 42)]
                hull = cv2.convexHull(np.array(pnts_right)).astype(np.int32)
                mask = cv2.drawContours(mask, [hull], 0, (255, 255, 255), -1)

                # Draw left eye binary mask
                pnts_left = [(preds[i, 0], preds[i, 1]) for i in range(42, 48)]
                hull = cv2.convexHull(np.array(pnts_left)).astype(np.int32)
                mask = cv2.drawContours(mask, [hull], 0, (255, 255, 255), -1)

                # Draw mouth binary mask
                #pnts_mouth = [(preds[i,0],preds[i,1]) for i in range(48,60)]
                #hull = cv2.convexHull(np.array(pnts_mouth)).astype(np.int32)
                #mask = cv2.drawContours(mask,[hull],0,(255,255,255),-1)

                mask = cv2.dilate(mask,
                                  np.ones((13, 13), np.uint8),
                                  iterations=1)
                mask = cv2.GaussianBlur(mask, (7, 7), 0)

            else:
                mask = np.zeros_like(x)
                print(f"No faces were detected in image '{fn}''")
                fns_face_not_detected.append(fn)

            plt.imsave(fname=f"{save_path}/{raw_fn}", arr=mask, format="jpg")
    num_faceA = len(glob(f"{dir_faceA}/*.*"))
    num_faceB = len(glob(f"{dir_faceB}/*.*"))

    print("Nuber of processed images: " + str(num_faceA + num_faceB))
    print("Number of image(s) with no face detected: " +
          str(len(fns_face_not_detected)))
Exemplo n.º 27
0
from torch import multiprocessing

import time  

def parse_args():
    parser = argparse.ArgumentParser()

   
    parser.add_argument('-i','--in_file', type=str, default='56')
      
    return parser.parse_args()
config = parse_args()



fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, flip_input=False)#,  device='cpu')


detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('./basics/shape_predictor_68_face_landmarks.dat')


def unzip_video(path):
    
    tar_files = os.listdir(path)
    valid = []
    for f in tar_files:
        if 'mpg' in f:
            valid.append(f)
    for f in valid:
        f_path = os.path.join(path, f)
Exemplo n.º 28
0
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

mtcnn = MTCNN(image_size=160,
              margin=0,
              min_face_size=20,
              thresholds=[0.6, 0.7, 0.7],
              factor=0.709,
              post_process=True,
              device=device)

resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)
#################################################################################
#t######## Face alignment predictor
face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                             face_detector='sfd',
                             device='cpu')

classifier = None
model_path = "/home/brance/Mine/Prototypes/Face Ui/trained_faces_model.clf"

# Loading model
try:
    with open(model_path, 'rb') as f:
        classifier = pickle.load(f)
        print("Loaded classifier ...................")

except Exception as e:
    print("[ERROR]:>>> Could Not Load FaceSickNess Classifier ! \n", e)

def generate_landmarks(cap, device, pad):
    """Input: cap a cv2.VideoCapture object, device the torch.device, 
    pad the distance in pixel from border to face
    output: x the camera output, g_y the corresponding landmark"""

    #Get video image
    frame_landmark_list = []
    fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                                      flip_input=False,
                                      device='cuda:0')
    i = 0

    if (cap.isOpened()):
        # Capture frame-by-frame
        ret, frame = cap.read()
        if not ret:
            print("broke at if not ret", ret)
            return None, None  # Can't receive frame. Possibly due to stream end
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames_list = [rgb]

        #Create landmark for face
        frame_landmark_list = []
        try:
            input = frames_list[i]
            preds = fa.get_landmarks(input)[0]

            input = crop_and_reshape_img(input, preds, pad=pad)
            preds = crop_and_reshape_preds(preds, pad=pad)

            dpi = 100
            fig = plt.figure(figsize=(256 / dpi, 256 / dpi), dpi=dpi)
            ax = fig.add_subplot(1, 1, 1)
            ax.imshow(np.ones(input.shape))
            plt.subplots_adjust(left=0, right=1, top=1, bottom=0)

            #chin
            ax.plot(preds[0:17, 0],
                    preds[0:17, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='green',
                    lw=2)
            #left and right eyebrow
            ax.plot(preds[17:22, 0],
                    preds[17:22, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='orange',
                    lw=2)
            ax.plot(preds[22:27, 0],
                    preds[22:27, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='orange',
                    lw=2)
            #nose
            ax.plot(preds[27:31, 0],
                    preds[27:31, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='blue',
                    lw=2)
            ax.plot(preds[31:36, 0],
                    preds[31:36, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='blue',
                    lw=2)
            #left and right eye
            ax.plot(preds[36:42, 0],
                    preds[36:42, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='red',
                    lw=2)
            ax.plot(preds[42:48, 0],
                    preds[42:48, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='red',
                    lw=2)
            #outer and inner lip
            ax.plot(preds[48:60, 0],
                    preds[48:60, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='purple',
                    lw=2)
            ax.plot(preds[60:68, 0],
                    preds[60:68, 1],
                    marker='',
                    markersize=5,
                    linestyle='-',
                    color='pink',
                    lw=2)
            ax.axis('off')

            fig.canvas.draw()

            data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
            data = data.reshape(fig.canvas.get_width_height()[::-1] + (3, ))

            frame_landmark_list.append((input, data))
            plt.close(fig)
            no_pic = False
        except:
            print('Error: Video corrupted or no landmarks visible')

    frame_mark = torch.from_numpy(np.array(frame_landmark_list)).type(
        dtype=torch.float)  #K,2,256,256,3
    if frame_mark.shape[0] == 0:
        print("broke at frame_mark.shape", frame_mark.shape, frame_mark)
        return None, None
    frame_mark = frame_mark.transpose(2, 4).to(device)  #K,2,3,256,256

    x = frame_mark[0, 0].to(device)
    g_y = frame_mark[0, 1].to(device)

    return x, g_y
Exemplo n.º 30
0
def main():
    fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                                      flip_input=False,
                                      device='cpu')
    ms = MS()

    def get_lms5_2d_from_2dFan(img):
        if any([x < 32 for x in img.shape[:2]]):
            return None
        preds = fa.get_landmarks(img)
        if preds is None:
            return None
        Lm2D = preds[0]
        lm_idx = np.array([31, 37, 40, 43, 46, 49, 55]) - 1

        l_eye = np.mean(Lm2D[lm_idx[[1, 2]]], 0)
        r_eye = np.mean(Lm2D[lm_idx[[3, 4]]], 0)
        nose = Lm2D[lm_idx[0]]
        l_mouth = Lm2D[lm_idx[5]]
        r_mouth = Lm2D[lm_idx[6]]

        lms5 = np.stack([l_eye, r_eye, nose, l_mouth, r_mouth], axis=0)
        lms5 = np.round(lms5).astype(int)
        return lms5

    def draw_vertices(mesh_vertices,
                      image,
                      vertice_color=[255, 255, 255],
                      vertice_r=1,
                      extra_scale=3):
        mesh_vertices *= extra_scale

        h, w, _ = image.shape
        w *= extra_scale
        h *= extra_scale
        image = cv2.resize(image, (w, h))

        def draw_r_1():
            image[y, x] = vertice_color

        def draw_r():
            cv2.circle(image, (x, y), vertice_r, vertice_color, thickness=-1)

        if vertice_r == 1:
            draw_func = draw_r_1
        else:
            draw_func = draw_r

        for vertex in mesh_vertices:
            x, y = [np.round(v).astype(int) for v in vertex[:2]]
            if 0 <= x < w and 0 <= y < h:
                draw_func()
        return image

    mp = mappings.GetWorkingDir()
    file = os.path.join(
        mp(**{})[mp.PIPE_DEFAULT.KEY_OUT_working_dir], 'SVO-03-01-detect.json')
    assert os.path.isfile(file)
    data = json.load(open(file))
    frame_img_file = os.path.join(mp.PIPE_DEFAULT.KEY_OUT_working_dir,
                                  'tmp_sb/0001.jpg')
    assert os.path.isfile(frame_img_file)
    frame_img = cv2.imread(frame_img_file)
    frame_img = cv2.cvtColor(frame_img, cv2.COLOR_RGB2BGR)

    vis_scale = 4
    h, w, _ = frame_img.shape
    w *= vis_scale
    h *= vis_scale
    frame_img_vis = cv2.resize(frame_img, (w, h))

    keypoints_ref = dict(
        nose=0,
        leye=15,
        reye=16,
        lear=17,
        rear=18,
        chest=1,
        lshoulder=2,
        rshoulder=5,
    )

    for frame in data['FrameSequences'][0]['Frames']:
        for detected_object in frame['DetectedObjects'][0:]:
            if 'Bones' in detected_object:
                bones = detected_object['Bones']
                points = []
                for x, y in zip(bones[::2], bones[1::2]):
                    if x == -1 and y == -1:
                        pass
                    else:
                        assert x >= 0 and y >= 0, "{}, {}".format(x, y)
                    points.append([x, y])

                points = np.array(points)
                points = points[list(keypoints_ref.values())]
                points = np.array(
                    list(filter(lambda p: (p[0] >= 0) and (p[1] >= 0),
                                points)))
                if len(points) == 0: continue

                stds = 3 * np.array(
                    [np.std(points[:, 0]), 1.5 * np.std(points[:, 1])])
                if not 0.1 < stds[0] / stds[1] < 10: continue
                center = np.array(np.mean(points, axis=0))
                assert center.shape == stds.shape
                bbox = [
                    center[0] - stds[0], center[1] - stds[1],
                    center[0] + stds[0], center[1] + stds[1]
                ]
                bbox = np.round(bbox).astype(int)

                #
                # try:
                #     assert all([x>=0 for x in nose])
                #     assert all([x>=0 for x in chest])
                # except Exception:
                #     continue

                # bbox_h = abs(nose[1]-chest[1])/2
                # frame_img = cv2.rectangle(frame_img, (int(round(nose[0]-bbox_h)), int(round(nose[1]-bbox_h))),
                #               (int(round(nose[0]+bbox_h)), int(round(nose[1]+bbox_h))),
                #               color=(0,255,255), thickness=2)

                # frame_img = cv2.rectangle(frame_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color=(0,255,255), thickness=2)
                crop_from_openpose = frame_img[bbox[1]:bbox[3],
                                               bbox[0]:bbox[2], :]
                # plt.imshow(frame_img)
                # plt.show()

                # plt.imshow(crop_from_openpose)
                # plt.title('crop_from_openpose')
                # plt.show()
                lms2d_n5 = get_lms5_2d_from_2dFan(deepcopy(crop_from_openpose))
                if lms2d_n5 is None:
                    continue

                vis_crop = deepcopy(crop_from_openpose)
                for p in lms2d_n5:
                    vis_crop = cv2.circle(vis_crop,
                                          tuple([int(x) for x in p]),
                                          radius=1,
                                          color=(0, 255, 0),
                                          thickness=-1)
                # plt.imshow(vis_crop)
                # plt.title('crop_from_openpose with lms2d_n5')
                # plt.show()

                img = np.expand_dims(deepcopy(crop_from_openpose), axis=0)
                lms2d_n5 = np.expand_dims(lms2d_n5, axis=0)
                out = ms.get_depthmap(img, lms2d_n5, False)
                if any([x == 0 for x in out['crop'].numpy().shape]):
                    continue
                plt.imshow(out['crop'].numpy().astype(int))
                plt.title('ms_crop')
                plt.show()
                mesh = out['mesh'].numpy()[0]
                # vis = draw_vertices(mesh, out['crop'].numpy())
                # vis = vis.astype(int)
                # for p in lms2d_n5:
                #     vis = cv2.circle(vis, tuple([int(x) for x in p]), radius=1, color=(0,255,0), thickness=-1)
                # plt.imshow(vis)
                # plt.title('ms_crop')
                # plt.show()
                # print()

                t = out['t'].numpy()
                t[0] *= crop_from_openpose.shape[1]
                t[1] *= crop_from_openpose.shape[0]
                # t = np.round(t).astype(int)
                mesh = out['mesh'].numpy()[0]
                mesh *= 1 / out['s'].numpy()
                mesh[:, 0] += t[0]
                mesh[:, 1] += t[1]
                # vis = draw_vertices(deepcopy(mesh), deepcopy(crop_from_openpose), extra_scale=6)
                # vis = vis.astype(int)
                # plt.imshow(vis)
                # plt.title('openpose_crop')
                # plt.show()
                # print()

                # ms_crop = np.round(out['crop'].numpy()).astype(int)
                # new_shape = np.round(1/out['s'].numpy()*np.array(ms_crop.shape[:2])).astype(int)
                # a = np.round(tf.image.resize(ms_crop, new_shape[:2],).numpy()).astype(int)
                # t = np.round(t).astype(int)
                # crop_from_openpose[t[1]:t[1]+new_shape[0], t[0]:t[0]+new_shape[1], :] = a
                # plt.imshow(crop_from_openpose)
                # plt.title('ms crop insertion')
                # plt.show()
                # print()

                mesh[:, 0] += bbox[0]
                mesh[:, 1] += bbox[1]
                mesh *= vis_scale
                frame_img_vis = draw_vertices(deepcopy(mesh),
                                              deepcopy(frame_img_vis),
                                              extra_scale=1)
                frame_img_vis = frame_img_vis.astype(int)
                plt.imshow(frame_img_vis)
                plt.title('main frame')
                plt.show()
                print()
            # break
        break
    print()