def find_best_frame_func(self, source, driving): import face_alignment def normalize_kp(kp): kp = kp - kp.mean(axis=0, keepdims=True) area = ConvexHull(kp[:, :2]).volume area = np.sqrt(area) kp[:, :2] = kp[:, :2] / area return kp fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=True) kp_source = fa.get_landmarks(255 * source)[0] kp_source = normalize_kp(kp_source) norm = float('inf') frame_num = 0 for i, image in tqdm(enumerate(driving)): kp_driving = fa.get_landmarks(255 * image)[0] kp_driving = normalize_kp(kp_driving) new_norm = (np.abs(kp_source - kp_driving)**2).sum() if new_norm < norm: norm = new_norm frame_num = i return frame_num
def make_animation(self, source_image, driving_video, generator, kp_detector, relative=True, adapt_movement_scale=True): with paddle.no_grad(): predictions = [] source = paddle.to_tensor(source_image[np.newaxis].astype( np.float32)).transpose([0, 3, 1, 2]) driving = paddle.to_tensor( np.array(driving_video)[np.newaxis].astype( np.float32)).transpose([0, 4, 1, 2, 3]) kp_source = kp_detector(source) kp_driving_initial = kp_detector(driving[:, :, 0]) for frame_idx in tqdm(range(driving.shape[2])): driving_frame = driving[:, :, frame_idx] kp_driving = kp_detector(driving_frame) kp_norm = normalize_kp( kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source, kp_source=kp_source, kp_driving=kp_norm) predictions.append( np.transpose(out['prediction'].numpy(), [0, 2, 3, 1])[0]) return predictions
def make_animation(self, source_image, driving_video, generator, kp_detector, relative=True, adapt_movement_scale=True): with paddle.no_grad(): predictions = [] source = paddle.to_tensor(source_image[np.newaxis].astype( np.float32)).transpose([0, 3, 1, 2]) driving = paddle.to_tensor( np.array(driving_video).astype(np.float32)).transpose( [0, 3, 1, 2]) kp_source = kp_detector(source) kp_driving_initial = kp_detector(driving[0:1]) kp_source_batch = {} kp_source_batch["value"] = paddle.tile( kp_source["value"], repeat_times=[self.batch_size, 1, 1]) kp_source_batch["jacobian"] = paddle.tile( kp_source["jacobian"], repeat_times=[self.batch_size, 1, 1, 1]) source = paddle.tile(source, repeat_times=[self.batch_size, 1, 1, 1]) begin_idx = 0 for frame_idx in tqdm( range( int(np.ceil(float(driving.shape[0]) / self.batch_size)))): frame_num = min(self.batch_size, driving.shape[0] - begin_idx) driving_frame = driving[begin_idx:begin_idx + frame_num] kp_driving = kp_detector(driving_frame) kp_source_img = {} kp_source_img["value"] = kp_source_batch["value"][0:frame_num] kp_source_img["jacobian"] = kp_source_batch["jacobian"][ 0:frame_num] kp_norm = normalize_kp( kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source[0:frame_num], kp_source=kp_source_img, kp_driving=kp_norm) img = np.transpose(out['prediction'].numpy(), [0, 2, 3, 1]) * 255.0 if self.face_enhancement: img = self.faceenhancer.enhance_from_batch(img) predictions.append(img) begin_idx += frame_num return np.concatenate(predictions)