def mock_encoded_video_dataset_file(): """ Creates a temporary mock encoded video dataset with 4 videos labeled from 0 - 4. Returns a labeled video file which points to this mock encoded video dataset, the ordered label and videos tuples and the video duration in seconds. """ num_frames = 10 fps = 5 with temp_encoded_video(num_frames=num_frames, fps=fps) as ( video_file_name_1, data_1, ): with temp_encoded_video(num_frames=num_frames, fps=fps) as ( video_file_name_2, data_2, ): with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f: f.write(f"{video_file_name_1} 0\n".encode()) f.write(f"{video_file_name_2} 1\n".encode()) f.write(f"{video_file_name_1} 2\n".encode()) f.write(f"{video_file_name_2} 3\n".encode()) label_videos = [ (0, data_1), (1, data_2), (2, data_1), (3, data_2), ] video_duration = num_frames / fps yield f.name, label_videos, video_duration
def test_video_name_with_whitespace_works(self, decoder): num_frames = 10 fps = 5 with temp_encoded_video(num_frames=num_frames, fps=fps, prefix="pre fix") as ( video_file_name, data, ): with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f: f.write(f"{video_file_name} 0\n".encode()) f.write(f"{video_file_name} 1\n".encode()) total_duration = num_frames / fps clip_sampler = make_clip_sampler("uniform", total_duration) labeled_video_paths = LabeledVideoPaths.from_path(f.name) dataset = LabeledVideoDataset( labeled_video_paths, clip_sampler=clip_sampler, video_sampler=SequentialSampler, decode_audio=False, decoder=decoder, ) expected = [(0, data), (1, data)] for i, sample in enumerate(dataset): self.assertTrue(sample["video"].equal(expected[i][1])) self.assertEqual(sample["label"], expected[i][0])
def test_video_works(self): num_frames = 11 fps = 5 with temp_encoded_video(num_frames=num_frames, fps=fps) as (file_name, data): test_video = EncodedVideo.from_path(file_name) self.assertAlmostEqual(test_video.duration, num_frames / fps) # All frames (0 - test_video.duration seconds) clip = test_video.get_clip(0, test_video.duration) frames, audio_samples = clip["video"], clip["audio"] self.assertTrue(frames.equal(data)) self.assertEqual(audio_samples, None) # Half frames clip = test_video.get_clip(0, test_video.duration / 2) frames, audio_samples = clip["video"], clip["audio"] self.assertTrue(frames.equal(data[:, :round(num_frames / 2)])) self.assertEqual(audio_samples, None) # No frames clip = test_video.get_clip(test_video.duration + 1, test_video.duration + 3) frames, audio_samples = clip["video"], clip["audio"] self.assertEqual(frames, None) self.assertEqual(audio_samples, None) test_video.close()
def test_sampling_with_non_divisible_processes_by_clips(self, decoder): # Make one video with 15 frames and one with 10 frames, producing 3 clips and 2 # clips respectively. num_frames = 10 fps = 5 with temp_encoded_video(num_frames=int(num_frames * 1.5), fps=fps) as ( video_file_name_1, data_1, ): with temp_encoded_video(num_frames=num_frames, fps=fps) as ( video_file_name_2, data_2, ): with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f: f.write(f"{video_file_name_1} 0\n".encode()) f.write(f"{video_file_name_2} 1\n".encode()) total_duration = num_frames / fps half_duration = total_duration / 2 - self._EPS clip_sampler = make_clip_sampler("uniform", half_duration) labeled_video_paths = LabeledVideoPaths.from_path(f.name) dataset = LabeledVideoDataset( labeled_video_paths, clip_sampler=clip_sampler, video_sampler=SequentialSampler, decode_audio=False, decoder=decoder, ) half_frames = num_frames // 2 expected = { (0, data_1[:, half_frames * 2:]), # 1/3 clip (0, data_1[:, half_frames:half_frames * 2]), # 2/3 clip (0, data_1[:, :half_frames]), # 3/3/ clip (1, data_2[:, :half_frames]), # First half (1, data_2[:, half_frames:]), # Second half } test_dataloader = DataLoader(dataset, batch_size=None, num_workers=2) actual = [(sample["label"], sample["video"]) for sample in test_dataloader] assert_unordered_list_compare_true(self, expected, actual)
def test_constant_clips_per_video_sampling_works(self, decoder): # Make one video with 15 frames and one with 10 frames, producing 3 clips and 2 # clips respectively. num_frames = 10 fps = 5 with temp_encoded_video(num_frames=int(num_frames * 1.5), fps=fps) as ( video_file_name_1, data_1, ): with temp_encoded_video(num_frames=num_frames, fps=fps) as ( video_file_name_2, data_2, ): with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f: f.write(f"{video_file_name_1} 0\n".encode()) f.write(f"{video_file_name_2} 1\n".encode()) clip_frames = 2 duration_for_frames = clip_frames / fps - self._EPS clip_sampler = make_clip_sampler("constant_clips_per_video", duration_for_frames, 2) labeled_video_paths = LabeledVideoPaths.from_path(f.name) dataset = LabeledVideoDataset( labeled_video_paths, clip_sampler=clip_sampler, video_sampler=SequentialSampler, decode_audio=False, decoder=decoder, ) # Dataset has 2 videos. Each video has two evenly spaced clips of size # clip_frames sampled. The first clip of each video will always be # sampled at second 0. The second clip of the video is the next frame # from time: (total_duration - clip_duration) / 2 half_frames_1 = math.ceil((data_1.shape[1] - clip_frames) / 2) half_frames_2 = math.ceil((data_2.shape[1] - clip_frames) / 2) expected = [ (0, data_1[:, :clip_frames]), (0, data_1[:, half_frames_1:half_frames_1 + clip_frames]), (1, data_2[:, :clip_frames]), (1, data_2[:, half_frames_2:half_frames_2 + clip_frames]), ] for i, sample in enumerate(dataset): self.assertTrue(sample["video"].equal(expected[i][1])) self.assertEqual(sample["label"], expected[i][0])
def test_file_api(self): num_frames = 11 fps = 5 with temp_encoded_video(num_frames=num_frames, fps=fps) as (file_name, data): with open(file_name, "rb") as f: test_video = EncodedVideoPyAV(f) self.assertAlmostEqual(test_video.duration, num_frames / fps) clip = test_video.get_clip(0, test_video.duration) frames, audio_samples = clip["video"], clip["audio"] self.assertTrue(frames.equal(data)) self.assertEqual(audio_samples, None)
def test_sampling_with_distributed_sampler(self, decoder): # Make one video with 15 frames and one with 10 frames, producing 3 clips and 2 # clips respectively. num_frames = 10 fps = 5 with temp_encoded_video(num_frames=int(num_frames * 1.5), fps=fps) as ( video_file_name_1, data_1, ): with temp_encoded_video(num_frames=num_frames, fps=fps) as ( video_file_name_2, data_2, ): with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f: f.write(f"{video_file_name_1} 0\n".encode()) f.write(f"{video_file_name_2} 1\n".encode()) total_duration = num_frames / fps half_duration = total_duration / 2 - self._EPS # Create several processes initialized in a PyTorch distributed process # group so that distributed sampler is setup correctly when dataset is # constructed. num_processes = 2 processes = [] return_dict = multiprocessing.Manager().dict() for rank in range(num_processes): p = Process( target=run_distributed, args=( rank, num_processes, decoder, half_duration, f.name, return_dict, ), ) p.start() processes.append(p) for p in processes: p.join() # After joining all distributed processes we expect all these label, # video pairs to be returned in random order. half_frames = num_frames // 2 expected = { (0, data_1[:, :half_frames]), # 1/3 clip (0, data_1[:, half_frames:half_frames * 2]), # 2/3 clip (0, data_1[:, half_frames * 2:]), # 3/3 clip (1, data_2[:, :half_frames]), # First half (1, data_2[:, half_frames:]), # Second half } epoch_results = collections.defaultdict(list) for v in return_dict.values(): for k_2, v_2 in v.items(): epoch_results[k_2].extend(v_2) assert_unordered_list_compare_true(self, expected, epoch_results["epoch_1"]) assert_unordered_list_compare_true(self, expected, epoch_results["epoch_2"])