def __getitem__(self, index): # Get sequence paths. seq_idx = self.update_frame_index(self.nmfc_video_paths, index) nmfc_video_paths = self.nmfc_video_paths[seq_idx] nmfc_len = len(nmfc_video_paths) rgb_video_paths = self.rgb_video_paths[seq_idx] if not self.opt.no_eye_gaze or (not self.opt.no_mouth_D and self.opt.isTrain) or (self.opt.use_eyes_D and self.opt.isTrain): landmark_video_paths = self.landmark_video_paths[seq_idx] # Get parameters and transforms. n_frames_total, start_idx = get_video_parameters(self.opt, self.n_frames_total, nmfc_len, self.frame_idx) first_nmfc_image = Image.open(nmfc_video_paths[0]).convert('RGB') params = get_params(self.opt, first_nmfc_image.size) transform_scale_nmfc_video = get_transform(self.opt, params, normalize=False, augment=not self.opt.no_augment_input and self.opt.isTrain) # do not normalize nmfc but augment. transform_scale_eye_gaze_video = transform_scale_nmfc_video #get_transform(self.opt, params, normalize=False) # do not normalize eye_gaze. transform_scale_rgb_video = get_transform(self.opt, params) change_seq = False if self.opt.isTrain else self.change_seq # Read data. A_paths = [] rgb_video = nmfc_video = eye_video = mouth_centers = eyes_centers = 0 for i in range(n_frames_total): # NMFC nmfc_video_path = nmfc_video_paths[start_idx + i] nmfc_video_i = self.get_image(nmfc_video_path, transform_scale_nmfc_video) nmfc_video = nmfc_video_i if i == 0 else torch.cat([nmfc_video, nmfc_video_i], dim=0) # RGB rgb_video_path = rgb_video_paths[start_idx + i] rgb_video_i = self.get_image(rgb_video_path, transform_scale_rgb_video) rgb_video = rgb_video_i if i == 0 else torch.cat([rgb_video, rgb_video_i], dim=0) A_paths.append(nmfc_video_path) if not self.opt.no_eye_gaze: landmark_video_path = landmark_video_paths[start_idx + i] eye_video_i = create_eyes_image(landmark_video_path, first_nmfc_image.size, transform_scale_eye_gaze_video, add_noise=self.opt.isTrain) eye_video = eye_video_i if i == 0 else torch.cat([eye_video, eye_video_i], dim=0) if not self.opt.no_mouth_D and self.opt.isTrain: landmark_video_path = landmark_video_paths[start_idx + i] mouth_centers_i = self.get_mouth_center(landmark_video_path) mouth_centers = mouth_centers_i if i == 0 else torch.cat([mouth_centers, mouth_centers_i], dim=0) if self.opt.use_eyes_D and self.opt.isTrain: landmark_video_path = landmark_video_paths[start_idx + i] eyes_centers_i = self.get_eyes_center(landmark_video_path) eyes_centers = eyes_centers_i if i == 0 else torch.cat([eyes_centers, eyes_centers_i], dim=0) return_list = {'nmfc_video': nmfc_video, 'rgb_video':rgb_video, 'eye_video':eye_video, 'mouth_centers':mouth_centers, 'eyes_centers':eyes_centers, 'change_seq':change_seq, 'A_paths':A_paths} return return_list
def compute_fake_video(input_queue, output_queue, modelG, opt): input_A_all = None while True: # Read input. conditional_input = input_queue.get() nmfc, eye_landmarks, real_frame = conditional_input width, height = nmfc.shape[0:2] # Create tensors params = get_params(opt, (width, height)) transform_scale_nmfc_video = get_transform(opt, params, normalize=False, augment=False) nmfc = transform_scale_nmfc_video(Image.fromarray(nmfc)) transform_scale_eye_gaze_video = get_transform(opt, params, normalize=False) eye_gaze = create_eyes_image(None, (width, height), transform_scale_eye_gaze_video, add_noise=False, pts=eye_landmarks) # Concat conditional inputs. input_A = torch.cat([nmfc, eye_gaze], dim=0) if input_A_all is None: # If no previously generated frames available, pad zeros input_A_all = torch.cat([ torch.zeros((opt.n_frames_G - 1) * opt.input_nc, width, height), input_A ], dim=0) else: # Discard oldest conditional input and append new one. input_A_all = torch.cat( [input_A_all[opt.input_nc:, :, :], input_A], dim=0) input_A_final = input_A_all.view(1, -1, opt.input_nc, width, height) # Forward pass through Generator. generated = modelG.inference(input_A_final, None) fake_frame = util.tensor2im(generated[0].data[0]) # Write results to Queue. output_queue.put((fake_frame, real_frame))