Exemplo n.º 1
0
    def __getitem__(self, item):

        fls_filename = self.fls_filenames[item]

        # load landmark file
        fls = np.loadtxt(os.path.join(self.src_dir, fls_filename))

        # load mp4 file
        # ================= raw VOX version ================================
        mp4_filename = fls_filename[:-4].split('_x_')
        mp4_id = mp4_filename[0].split('_')[-1]
        mp4_vname = mp4_filename[1]
        mp4_vid = mp4_filename[2][:-3]
        video_dir = os.path.join(self.mp4_dir, mp4_id,
                                 mp4_vname, mp4_vid + '.mp4')
        # print('============================\nvideo_dir : ' + video_dir, item)
        # ======================================================================

        video = cv2.VideoCapture(video_dir)
        if (video.isOpened() == False):
            print('Unable to open video file')
            exit(0)

        # skip first several frames due to landmark extraction
        start_idx = (fls[0, 0]).astype(int)
        for _ in range(start_idx):
            ret, img_video = video.read()

        # save video and landmark in parallel
        frames = []
        random_frame_indices = np.random.permutation(
            fls.shape[0]-2)[0:self.num_random_frames]

        for j in range(int(fls.shape[0])):
            ret, img_video = video.read()

            if(j in random_frame_indices):
                img_fl = np.ones(shape=(224, 224, 3)) * 255
                idx = fls[j, 0]
                fl = fls[j, 1:].astype(int)

                img_fl = vis_landmark_on_img(img_fl, np.reshape(fl, (68, 3)))

                frame = np.concatenate((img_fl, img_video), axis=2)
                frame = cv2.resize(frame, (256, 256))  # 256 x 256  6
                frames.append(frame)

        frames = np.stack(frames, axis=0).astype(
            np.float32)/255.0  # N x 256 x 256 x 6

        image_in = np.concatenate(
            [frames[0:-1, :, :, 0:3], frames[1:, :, :, 3:6]], axis=3)
        image_out = frames[0:-1, :, :, 3:6]

        image_in, image_out = np.swapaxes(
            image_in, 1, 3), np.swapaxes(image_out, 1, 3)

        return image_in, image_out
Exemplo n.º 2
0
    def __getitem__(self, item):
        fls_filename = self.fls_filenames[item]

        # load landmark file
        fls = np.loadtxt(os.path.join(self.src_dir, fls_filename))
        from scipy.signal import savgol_filter
        fls = savgol_filter(fls, 11, 3, axis=0)

        # load random face
        random_fls_filename = self.fls_filenames[max(item-1, 0)]
        # random_fls_filename = self.fls_filenames[max(item-1, 0)]
        random_video_dir = os.path.join(
            self.mp4_dir, random_fls_filename[10:-7] + '.mp4')
        random_video = cv2.VideoCapture(random_video_dir)
        if (random_video.isOpened() == False):
            print('Unable to open video file')
            exit(0)
        _, random_face = random_video.read()

        # # ================= preprocessed VOX version ================================
        video_dir = os.path.join(self.mp4_dir, fls_filename[10:-7]+'.mp4')
        # ======================================================================

        video = cv2.VideoCapture(video_dir)
        if (video.isOpened() == False):
            print('Unable to open video file')
            exit(0)

        # skip first several frames due to landmark extraction
        start_idx = (fls[0, 0] // self.fps_scale).astype(int)
        for _ in range(start_idx):
            ret, img_video = video.read()

        # save video and landmark in parallel
        frames = []
        for j in range(int(fls.shape[0]//self.fps_scale)):
            ret, img_video = video.read()

            # img_fl = np.ones(shape=(224, 224, 3)) * 255
            img_fl = np.ones(shape=(256, 256, 3)) * 255
            idx = fls[int(j*self.fps_scale), 0]
            fl = fls[int(j*self.fps_scale), 1:].astype(int)
            img_fl = vis_landmark_on_img(img_fl, np.reshape(fl, (68, 3)))

            frame = np.concatenate((img_fl, random_face, img_video), axis=2)
            # frame = cv2.resize(frame, (256, 256))  # 256 x 256  6
            frames.append(frame)

        frames = np.stack(frames, axis=0).astype(
            np.float32)/255.0  # N x 256 x 256 x 9

        image_in = frames[:, :, :, 0:6]
        image_out = frames[:, :, :, 6:9]

        image_in, image_out = np.swapaxes(
            image_in, 1, 3), np.swapaxes(image_out, 1, 3)
        return image_in, image_out
Exemplo n.º 3
0
    def __getitem__(self, item):
        fls_filename = self.fls_filenames[item]

        # load landmark file
        fls = np.loadtxt(os.path.join(self.src_dir, fls_filename))

        # # ================= preprocessed VOX version ================================
        video_dir = os.path.join(self.mp4_dir, fls_filename[10:-7]+'.mp4')
        # ======================================================================

        video = cv2.VideoCapture(video_dir)
        if (video.isOpened() == False):
            print('Unable to open video file')
            exit(0)

        # skip first several frames due to landmark extraction
        start_idx = (fls[0, 0] // self.fps_scale).astype(int)
        for _ in range(start_idx):
            ret, img_video = video.read()

        # save video and landmark in parallel
        frames = []
        random_frame_indices = np.random.permutation(
            int(fls.shape[0]//self.fps_scale)-2)[0:self.num_random_frames]

        for j in range(int(fls.shape[0]//self.fps_scale)):
            ret, img_video = video.read()

            if(j in random_frame_indices):
                img_fl = np.ones(shape=(256, 256, 3)) * 255
                idx = fls[int(j*self.fps_scale), 0]
                fl = fls[int(j*self.fps_scale), 1:].astype(int)
                img_fl = vis_landmark_on_img(img_fl, np.reshape(fl, (68, 3)))

                frame = np.concatenate((img_fl, img_video), axis=2)
                frames.append(frame)

        frames = np.stack(frames, axis=0).astype(
            np.float32)/255.0  # N x 256 x 256 x 6

        image_in = np.concatenate(
            [frames[0:-1, :, :, 0:3], frames[1:, :, :, 3:6]], axis=3)
        image_out = frames[0:-1, :, :, 3:6]

        image_in, image_out = np.swapaxes(
            image_in, 1, 3), np.swapaxes(image_out, 1, 3)
        return image_in, image_out
Exemplo n.º 4
0
    def __getitem__(self, item):
        fls_filename = self.fls_filenames[item]

        # load landmark file
        fls = np.loadtxt(os.path.join(self.src_dir, fls_filename))
        from scipy.signal import savgol_filter
        fls = savgol_filter(fls, 11, 3, axis=0)

        # load random face
        random_fls_filename = self.fls_filenames[max(item - 1, 0)]
        mp4_filename = random_fls_filename[:-4].split('_x_')
        mp4_id = mp4_filename[0].split('_')[-1]
        mp4_vname = mp4_filename[1]
        mp4_vid = mp4_filename[2][:-3]
        random_video_dir = os.path.join(
            self.mp4_dir, mp4_id, mp4_vname, mp4_vid + '.mp4')
        print('============================\nvideo_dir : ' + random_video_dir, item)
        random_video = cv2.VideoCapture(random_video_dir)
        if (random_video.isOpened() == False):
            print('Unable to open video file')
            exit(0)
        _, random_face = random_video.read()

        # load mp4 file
        # ================= raw VOX version ================================
        mp4_filename = fls_filename[:-4].split('_x_')
        mp4_id = mp4_filename[0].split('_')[-1]
        mp4_vname = mp4_filename[1]
        mp4_vid = mp4_filename[2][:-3]
        video_dir = os.path.join(self.mp4_dir, mp4_id,
                                 mp4_vname, mp4_vid + '.mp4')
        # print('============================\nvideo_dir : ' + video_dir, item)
        # ======================================================================

        video = cv2.VideoCapture(video_dir)
        if (video.isOpened() == False):
            print('Unable to open video file')
            exit(0)

        # skip first several frames due to landmark extraction
        start_idx = (fls[0, 0]).astype(int)
        for _ in range(start_idx):
            ret, img_video = video.read()

        # save video and landmark in parallel
        frames = []

        for j in range(int(fls.shape[0])-2):
            ret, img_video = video.read()

            img_fl = np.ones(shape=(224, 224, 3)) * 255
            idx = fls[j, 0]
            fl = fls[j, 1:].astype(int)
            img_fl = vis_landmark_on_img(img_fl, np.reshape(fl, (68, 3)))

            # print(img_fl.shape, random_face.shape, img_video.shape)
            frame = np.concatenate((img_fl, random_face, img_video), axis=2)
            frame = cv2.resize(frame, (256, 256))  # 256 x 256  6
            frames.append(frame)

        frames = np.stack(frames, axis=0).astype(
            np.float32)/255.0  # N x 256 x 256 x 6
        image_in = frames[:, :, :, 0:6]
        image_out = frames[:, :, :, 6:9]

        image_in, image_out = np.swapaxes(
            image_in, 1, 3), np.swapaxes(image_out, 1, 3)
        return image_in, image_out