def run_distributed(rank, size, decoder, clip_duration, data_name, return_dict): """ This function is run by each distributed process. It samples videos based on the distributed split (determined by the DistributedSampler) and returns the dataset clips in the return_dict. """ os.environ["MASTER_ADDR"] = "127.0.0.1" os.environ["MASTER_PORT"] = "29500" dist.init_process_group("gloo", rank=rank, world_size=size) clip_sampler = make_clip_sampler("uniform", clip_duration) labeled_video_paths = LabeledVideoPaths.from_path(data_name) dataset = LabeledVideoDataset( labeled_video_paths, clip_sampler=clip_sampler, video_sampler=DistributedSampler, decode_audio=False, decoder=decoder, ) test_dataloader = DataLoader(dataset, batch_size=None, num_workers=1) # Run two epochs, simulating use in a training loop dataset.video_sampler.set_epoch(0) epoch_1 = [(sample["label"], sample["video"]) for sample in test_dataloader] dataset.video_sampler.set_epoch(1) epoch_2 = [(sample["label"], sample["video"]) for sample in test_dataloader] return_dict[rank] = {"epoch_1": epoch_1, "epoch_2": epoch_2}
def __init__( self, running_stage: RunningStage, data: Any, *args, clip_sampler: str = "random", clip_duration: float = 2, video_sampler: Type[Sampler] = torch.utils.data.RandomSampler, decode_audio=False, decoder: str = "pyav", clip_sampler_kwargs: Optional[Dict] = None, data_folder: str = "", **kwargs, ): if not _PYTORCHVIDEO_AVAILABLE: raise ModuleNotFoundError( "Please, run `pip install pytorchvideo`.") self.video_sampler = video_sampler or torch.utils.data.RandomSampler clip_sampler_kwargs = clip_sampler_kwargs or {} self.clip_sampler = make_clip_sampler(clip_sampler, clip_duration, **clip_sampler_kwargs) self.decode_audio = decode_audio self.decoder = decoder self.clip_duration = clip_duration self._data_folder = data_folder super().__init__(running_stage, data, *args, **kwargs)
def test_multiple_labels_per_frame(self): frame_names = [f"{str(i)}.png" for i in range(3)] # Create csv containing a test frame videos. with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as f: f.write( "original_vido_id video_id frame_id path labels\n".encode()) with temp_frame_video(frame_names) as (frame_1_video_dir, data_1): for i, frame_name in enumerate(frame_names): original_video_id = str(frame_1_video_dir) video_id = "1" frame_id = str(i) path = pathlib.Path(frame_1_video_dir) / frame_name label = "0,100" f.write( f"{original_video_id} {video_id} {frame_id} {path} {label}\n" .encode()) f.close() clip_sampler = make_clip_sampler( "random", 0.1, # Total duration of 3 frames at 30fps is 0.1 seconds. ) ) dataset = Charades(f.name, clip_sampler=clip_sampler, video_sampler=SequentialSampler) sample = next(dataset) self.assertEqual(sample["label"], [[0, 100], [0, 100], [0, 100]]) self.assertTrue(sample["video"].equal(data_1))
def test_single_clip_per_video_works(self): with temp_ssv2_dataset() as ( label_name_file, video_label_file, video_path_file, video_1, video_2, ): # Put arbitrary duration as ssv2 always needs full video clip. clip_sampler = make_clip_sampler("constant_clips_per_video", 1.0, 1) # Expect taking 2 frames (1-th and 4-th among 7 frames). dataset = SSv2( label_name_file, video_label_file, video_path_file, clip_sampler=clip_sampler, video_sampler=SequentialSampler, frames_per_clip=2, ) expected = [(0, video_1), (1, video_2)] for sample, expected_sample in zip(dataset, expected): self.assertEqual(sample["label"], expected_sample[0]) self.assertTrue(sample["video"].equal(expected_sample[1][:, (1, 4)]))
def test_video_name_with_whitespace_works(self, decoder): num_frames = 10 fps = 5 with temp_encoded_video(num_frames=num_frames, fps=fps, prefix="pre fix") as ( video_file_name, data, ): with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f: f.write(f"{video_file_name} 0\n".encode()) f.write(f"{video_file_name} 1\n".encode()) total_duration = num_frames / fps clip_sampler = make_clip_sampler("uniform", total_duration) labeled_video_paths = LabeledVideoPaths.from_path(f.name) dataset = LabeledVideoDataset( labeled_video_paths, clip_sampler=clip_sampler, video_sampler=SequentialSampler, decode_audio=False, decoder=decoder, ) expected = [(0, data), (1, data)] for i, sample in enumerate(dataset): self.assertTrue(sample["video"].equal(expected[i][1])) self.assertEqual(sample["label"], expected[i][0])
def test_sampling_with_more_processes_than_videos(self, decoder): with mock_encoded_video_dataset_file() as ( mock_csv, label_videos, total_duration, ): half_duration = total_duration / 2 - self._EPS clip_sampler = make_clip_sampler("uniform", half_duration) labeled_video_paths = LabeledVideoPaths.from_path(mock_csv) dataset = LabeledVideoDataset( labeled_video_paths, clip_sampler=clip_sampler, video_sampler=SequentialSampler, decode_audio=False, decoder=decoder, ) # Split each full video into two clips. expected = [] for label, data in label_videos: num_frames = data.shape[0] half_frames = num_frames // 2 first_half_data = data[:, :half_frames] second_half_data = data[:, half_frames:] expected.append((label, first_half_data)) expected.append((label, second_half_data)) test_dataloader = DataLoader(dataset, batch_size=None, num_workers=16) actual = [(sample["label"], sample["video"]) for sample in test_dataloader] assert_unordered_list_compare_true(self, expected, actual)
def _make_clip_sampler( clip_sampler: Union[str, "ClipSampler"] = "random", clip_duration: float = 2, clip_sampler_kwargs: Dict[str, Any] = None, ) -> "ClipSampler": if clip_sampler_kwargs is None: clip_sampler_kwargs = {} return make_clip_sampler(clip_sampler, clip_duration, **clip_sampler_kwargs)
def __init__( self, train_transform: Optional[Dict[str, Callable]] = None, val_transform: Optional[Dict[str, Callable]] = None, test_transform: Optional[Dict[str, Callable]] = None, predict_transform: Optional[Dict[str, Callable]] = None, clip_sampler: Union[str, 'ClipSampler'] = "random", clip_duration: float = 2, clip_sampler_kwargs: Dict[str, Any] = None, video_sampler: Type[Sampler] = torch.utils.data.RandomSampler, decode_audio: bool = True, decoder: str = "pyav", ): self.clip_sampler = clip_sampler self.clip_duration = clip_duration self.clip_sampler_kwargs = clip_sampler_kwargs self.video_sampler = video_sampler self.decode_audio = decode_audio self.decoder = decoder if not _PYTORCHVIDEO_AVAILABLE: raise ModuleNotFoundError( "Please, run `pip install pytorchvideo`.") if not clip_sampler_kwargs: clip_sampler_kwargs = {} if not clip_sampler: raise MisconfigurationException( "clip_sampler should be provided as a string or ``pytorchvideo.data.clip_sampling.ClipSampler``" ) clip_sampler = make_clip_sampler(clip_sampler, clip_duration, **clip_sampler_kwargs) super().__init__( train_transform=train_transform, val_transform=val_transform, test_transform=test_transform, predict_transform=predict_transform, data_sources={ DefaultDataSources.FILES: VideoClassificationPathsDataSource( clip_sampler, video_sampler=video_sampler, decode_audio=decode_audio, decoder=decoder, ), DefaultDataSources.FOLDERS: VideoClassificationPathsDataSource( clip_sampler, video_sampler=video_sampler, decode_audio=decode_audio, decoder=decoder, ), }, default_data_source=DefaultDataSources.FILES, )
def test_reading_from_directory_structure(self, decoder): # For an unknown reason this import has to be here for `buck test` to work. import torchvision.io as io with tempfile.TemporaryDirectory() as root_dir: # Create test directory structure with two classes and a video in each. root_dir_name = pathlib.Path(root_dir) test_class_1 = root_dir_name / "running" test_class_1.mkdir() data_1 = create_dummy_video_frames(15, 10, 10) test_class_2 = root_dir_name / "cleaning windows" test_class_2.mkdir() data_2 = create_dummy_video_frames(20, 15, 15) with tempfile.NamedTemporaryFile( suffix=".mp4", dir=test_class_1) as f_1, tempfile.NamedTemporaryFile( suffix=".mp4", dir=test_class_2) as f_2: f_1.close() f_2.close() # Write lossless video for each class. io.write_video( f_1.name, data_1, fps=30, video_codec="libx264rgb", options={"crf": "0"}, ) io.write_video( f_2.name, data_2, fps=30, video_codec="libx264rgb", options={"crf": "0"}, ) clip_sampler = make_clip_sampler("uniform", 3) labeled_video_paths = LabeledVideoPaths.from_path(root_dir) dataset = LabeledVideoDataset( labeled_video_paths, clip_sampler=clip_sampler, video_sampler=SequentialSampler, decode_audio=False, decoder=decoder, ) # Videos are sorted alphabetically so "cleaning windows" (i.e. data_2) # will be first. sample_1 = next(dataset) self.assertEqual(sample_1["label"], 0) self.assertTrue(sample_1["video"].equal( thwc_to_cthw(data_2).to(torch.float32))) sample_2 = next(dataset) self.assertEqual(sample_2["label"], 1) self.assertTrue(sample_2["video"].equal( thwc_to_cthw(data_1).to(torch.float32)))
def test_video_only_frame_video_dataset(self): total_duration = 2.0 with mock_json_annotations() as (annotation_json, labels, duration): clip_sampler = make_clip_sampler("random", total_duration) dataset = json_dataset.video_only_dataset( data_path=annotation_json, clip_sampler=clip_sampler, decode_audio=False, ) self.assertEqual(dataset.num_videos, 2) self.assertEqual(len(list(iter(dataset))), 2)
def test_recognition_uniform_clip_sampler(self): total_duration = 0.05 with mock_json_annotations() as (annotation_json, labels, duration): clip_sampler = make_clip_sampler("uniform", total_duration) dataset = json_dataset.clip_recognition_dataset( data_path=annotation_json, clip_sampler=clip_sampler, decode_audio=False, ) self.assertEqual(dataset.num_videos, 4) self.assertEqual(len(list(iter(dataset))), 4)
def test_single_clip_per_video_works(self): with temp_charades_dataset() as (filename, video_1, video_2): clip_sampler = make_clip_sampler( "uniform", 0.1 # Total duration of 3 frames at 30fps is 0.1 seconds. ) dataset = Charades(filename, clip_sampler=clip_sampler, video_sampler=SequentialSampler) expected = [([[0], [0], [0]], video_1), ([[1], [1], [1]], video_2)] for sample, expected_sample in zip(dataset, expected): self.assertEqual(sample["label"], expected_sample[0]) self.assertTrue(sample["video"].equal(expected_sample[1]))
def test_random_video_sampler(self, decoder): with mock_encoded_video_dataset_file() as (mock_csv, expected, total_duration): clip_sampler = make_clip_sampler("uniform", total_duration) dataset = labeled_video_dataset( data_path=mock_csv, clip_sampler=clip_sampler, video_sampler=RandomSampler, decode_audio=False, decoder=decoder, ) for _ in range(2): actual = [(sample["label"], sample["video"]) for sample in dataset] assert_unordered_list_compare_true(self, expected, actual)
def test_sampling_with_non_divisible_processes_by_clips(self, decoder): # Make one video with 15 frames and one with 10 frames, producing 3 clips and 2 # clips respectively. num_frames = 10 fps = 5 with temp_encoded_video(num_frames=int(num_frames * 1.5), fps=fps) as ( video_file_name_1, data_1, ): with temp_encoded_video(num_frames=num_frames, fps=fps) as ( video_file_name_2, data_2, ): with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f: f.write(f"{video_file_name_1} 0\n".encode()) f.write(f"{video_file_name_2} 1\n".encode()) total_duration = num_frames / fps half_duration = total_duration / 2 - self._EPS clip_sampler = make_clip_sampler("uniform", half_duration) labeled_video_paths = LabeledVideoPaths.from_path(f.name) dataset = LabeledVideoDataset( labeled_video_paths, clip_sampler=clip_sampler, video_sampler=SequentialSampler, decode_audio=False, decoder=decoder, ) half_frames = num_frames // 2 expected = { (0, data_1[:, half_frames * 2:]), # 1/3 clip (0, data_1[:, half_frames:half_frames * 2]), # 2/3 clip (0, data_1[:, :half_frames]), # 3/3/ clip (1, data_2[:, :half_frames]), # First half (1, data_2[:, half_frames:]), # Second half } test_dataloader = DataLoader(dataset, batch_size=None, num_workers=2) actual = [(sample["label"], sample["video"]) for sample in test_dataloader] assert_unordered_list_compare_true(self, expected, actual)
def test_constant_clips_per_video_sampling_works(self, decoder): # Make one video with 15 frames and one with 10 frames, producing 3 clips and 2 # clips respectively. num_frames = 10 fps = 5 with temp_encoded_video(num_frames=int(num_frames * 1.5), fps=fps) as ( video_file_name_1, data_1, ): with temp_encoded_video(num_frames=num_frames, fps=fps) as ( video_file_name_2, data_2, ): with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f: f.write(f"{video_file_name_1} 0\n".encode()) f.write(f"{video_file_name_2} 1\n".encode()) clip_frames = 2 duration_for_frames = clip_frames / fps - self._EPS clip_sampler = make_clip_sampler("constant_clips_per_video", duration_for_frames, 2) labeled_video_paths = LabeledVideoPaths.from_path(f.name) dataset = LabeledVideoDataset( labeled_video_paths, clip_sampler=clip_sampler, video_sampler=SequentialSampler, decode_audio=False, decoder=decoder, ) # Dataset has 2 videos. Each video has two evenly spaced clips of size # clip_frames sampled. The first clip of each video will always be # sampled at second 0. The second clip of the video is the next frame # from time: (total_duration - clip_duration) / 2 half_frames_1 = math.ceil((data_1.shape[1] - clip_frames) / 2) half_frames_2 = math.ceil((data_2.shape[1] - clip_frames) / 2) expected = [ (0, data_1[:, :clip_frames]), (0, data_1[:, half_frames_1:half_frames_1 + clip_frames]), (1, data_2[:, :clip_frames]), (1, data_2[:, half_frames_2:half_frames_2 + clip_frames]), ] for i, sample in enumerate(dataset): self.assertTrue(sample["video"].equal(expected[i][1])) self.assertEqual(sample["label"], expected[i][0])
def test_single_clip_per_video_works(self, decoder): with mock_encoded_video_dataset_file() as (mock_csv, expected, total_duration): clip_sampler = make_clip_sampler("uniform", total_duration) dataset = labeled_video_dataset( data_path=mock_csv, clip_sampler=clip_sampler, video_sampler=SequentialSampler, decode_audio=False, decoder=decoder, ) test_dataloader = DataLoader(dataset, batch_size=None, num_workers=2) for _ in range(2): actual = [(sample["label"], sample["video"]) for sample in test_dataloader] assert_unordered_list_compare_true(self, expected, actual)
def test_multiple_clips_per_video_works(self): with temp_charades_dataset() as (filename, video_1, video_2): clip_sampler = make_clip_sampler( "uniform", 0.033 # Expects each clip to have 1 frame each. ) dataset = Charades(filename, clip_sampler=clip_sampler, video_sampler=SequentialSampler) expected = [ ([[0]], video_1[:, 0:1]), ([[0]], video_1[:, 1:2]), ([[0]], video_1[:, 2:3]), ([[1]], video_2[:, 0:1]), ([[1]], video_2[:, 1:2]), ([[1]], video_2[:, 2:3]), ] for sample, expected_sample in zip(dataset, expected): self.assertEqual(sample["label"], expected_sample[0]) self.assertTrue(sample["video"].equal(expected_sample[1]))
def test_random_clip_sampling_works(self, decoder): with mock_encoded_video_dataset_file() as ( mock_csv, label_videos, total_duration, ): half_duration = total_duration / 2 - self._EPS clip_sampler = make_clip_sampler("random", half_duration) labeled_video_paths = LabeledVideoPaths.from_path(mock_csv) dataset = LabeledVideoDataset( labeled_video_paths, clip_sampler=clip_sampler, video_sampler=SequentialSampler, decode_audio=False, decoder=decoder, ) expected_labels = [label for label, _ in label_videos] for i, sample in enumerate(dataset): expected_t_shape = 5 self.assertEqual(sample["video"].shape[1], expected_t_shape) self.assertEqual(sample["label"], expected_labels[i])
def test_multiple_videos(self): with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as data_file: with temp_ava_dataset_2_videos() as ( frame_paths_file, video_1, video_2, video_1_name, video_2_name, ): # add bounding boxes # video 1 bb_1_a, bb_1_a_string = get_random_bbox() action_1_a, iou_1_a = 1, 0.85 bb_1_b, bb_1_b_string = get_random_bbox() action_1_b, iou_1_b = 2, 0.4 data_file.write( (f"{video_1_name},902,{bb_1_a_string}," + f"{str(action_1_a)},{str(iou_1_a)}\n").encode()) data_file.write( (f"{video_1_name},902,{bb_1_b_string}," + f"{str(action_1_b)},{str(iou_1_b)}\n").encode()) # video 2 bb_2_a, bb_2_a_string = get_random_bbox() action_2_a, iou_2_a = 3, 0.95 bb_2_b, bb_2_b_string = get_random_bbox() action_2_b, iou_2_b = 4, 0.9 data_file.write( (f"{video_2_name},902,{bb_2_a_string}," + f"{str(action_2_a)},{str(iou_2_a)}\n").encode()) data_file.write( (f"{video_2_name},902,{bb_2_b_string}," + f"{str(action_2_b)},{str(iou_2_b)}\n").encode()) data_file.close() dataset = Ava( frame_paths_file=frame_paths_file, frame_labels_file=data_file.name, clip_sampler=make_clip_sampler("random", 1.0), ) # All videos are of the form cthw and fps is 30 # Clip is samples at time step = 2 secs in video sample_1 = next(dataset) self.assertTrue(sample_1["video"].equal(video_1[:, 45:75, :, :])) self.assertTrue( torch.tensor(sample_1["boxes"]).equal( torch.tensor([bb_1_a, bb_1_b]))) self.assertTrue( torch.tensor(sample_1["labels"]).equal( torch.tensor([[action_1_a], [action_1_b]]))) sample_2 = next(dataset) self.assertTrue(sample_2["video"].equal(video_2[:, 45:75, :, :])) self.assertTrue( torch.tensor(sample_2["boxes"]).equal( torch.tensor([bb_2_a, bb_2_b]))) self.assertTrue( torch.tensor(sample_2["labels"]).equal( torch.tensor([[action_2_a], [action_2_b]])))
def from_paths( cls, train_data_path: Optional[Union[str, pathlib.Path]] = None, val_data_path: Optional[Union[str, pathlib.Path]] = None, test_data_path: Optional[Union[str, pathlib.Path]] = None, predict_data_path: Union[str, pathlib.Path] = None, clip_sampler: Union[str, 'ClipSampler'] = "random", clip_duration: float = 2, clip_sampler_kwargs: Dict[str, Any] = None, video_sampler: Type[Sampler] = RandomSampler, decode_audio: bool = True, decoder: str = "pyav", train_transform: Optional[Dict[str, Callable]] = None, val_transform: Optional[Dict[str, Callable]] = None, test_transform: Optional[Dict[str, Callable]] = None, predict_transform: Optional[Dict[str, Callable]] = None, batch_size: int = 4, num_workers: Optional[int] = None, preprocess: Optional[Preprocess] = None, **kwargs, ) -> 'DataModule': """ Creates a VideoClassificationData object from folders of videos arranged in this way: :: train/class_x/xxx.ext train/class_x/xxy.ext train/class_x/xxz.ext train/class_y/123.ext train/class_y/nsdf3.ext train/class_y/asd932_.ext Args: train_data_path: Path to training folder. Default: None. val_data_path: Path to validation folder. Default: None. test_data_path: Path to test folder. Default: None. predict_data_path: Path to predict folder. Default: None. clip_sampler: ClipSampler to be used on videos. clip_duration: Clip duration for the clip sampler. clip_sampler_kwargs: Extra ClipSampler keyword arguments. video_sampler: Sampler for the internal video container. This defines the order videos are decoded and, if necessary, the distributed split. decode_audio: Whether to decode the audio with the video clip. decoder: Defines what type of decoder used to decode a video. train_transform: Video clip dictionary transform to use for training set. val_transform: Video clip dictionary transform to use for validation set. test_transform: Video clip dictionary transform to use for test set. predict_transform: Video clip dictionary transform to use for predict set. batch_size: Batch size for data loading. num_workers: The number of workers to use for parallelized loading. Defaults to ``None`` which equals the number of available CPU threads. preprocess: VideoClassifierPreprocess to handle the data processing. Returns: VideoClassificationData: the constructed data module Examples: >>> videos = VideoClassificationData.from_paths("train/") # doctest: +SKIP """ if not _PYTORCHVIDEO_AVAILABLE: raise ModuleNotFoundError("Please, run `pip install pytorchvideo`.") if not clip_sampler_kwargs: clip_sampler_kwargs = {} if not clip_sampler: raise MisconfigurationException( "clip_sampler should be provided as a string or ``pytorchvideo.data.clip_sampling.ClipSampler``" ) clip_sampler = make_clip_sampler(clip_sampler, clip_duration, **clip_sampler_kwargs) preprocess: Preprocess = preprocess or cls.preprocess_cls( clip_sampler, video_sampler, decode_audio, decoder, train_transform, val_transform, test_transform, predict_transform ) return cls.from_load_data_inputs( train_load_data_input=train_data_path, val_load_data_input=val_data_path, test_load_data_input=test_data_path, predict_load_data_input=predict_data_path, batch_size=batch_size, num_workers=num_workers, preprocess=preprocess, use_iterable_auto_dataset=True, **kwargs, )
def test_reading_from_directory_structure_hmdb51(self, decoder): # For an unknown reason this import has to be here for `buck test` to work. import torchvision.io as io with tempfile.TemporaryDirectory() as root_dir: # Create test directory structure with two classes and a video in each. root_dir_name = pathlib.Path(root_dir) action_1 = "running" action_2 = "cleaning_windows" videos_root_dir = root_dir_name / "videos" videos_root_dir.mkdir() test_class_1 = videos_root_dir / action_1 test_class_1.mkdir() data_1 = create_dummy_video_frames(15, 10, 10) test_class_2 = videos_root_dir / action_2 test_class_2.mkdir() data_2 = create_dummy_video_frames(20, 15, 15) test_splits = root_dir_name / "folds" test_splits.mkdir() with tempfile.NamedTemporaryFile( suffix="_u_nm_np1_ba_goo_19.avi", dir=test_class_1) as f_1, tempfile.NamedTemporaryFile( suffix="_u_nm_np1_fr_med_1.avi", dir=test_class_2) as f_2: f_1.close() f_2.close() # Write lossless video for each class. io.write_video( f_1.name, data_1, fps=30, video_codec="libx264rgb", options={"crf": "0"}, ) io.write_video( f_2.name, data_2, fps=30, video_codec="libx264rgb", options={"crf": "0"}, ) _, video_name_1 = os.path.split(f_1.name) _, video_name_2 = os.path.split(f_2.name) with open( os.path.join(test_splits, action_1 + "_test_split1.txt"), "w") as f: f.write(f"{video_name_1} 1\n") with open( os.path.join(test_splits, action_2 + "_test_split1.txt"), "w") as f: f.write(f"{video_name_2} 1\n") clip_sampler = make_clip_sampler("uniform", 3) dataset = Hmdb51( data_path=test_splits, video_path_prefix=root_dir_name / "videos", clip_sampler=clip_sampler, video_sampler=SequentialSampler, split_id=1, split_type="train", decode_audio=False, decoder=decoder, ) # Videos are sorted alphabetically so "cleaning windows" (i.e. data_2) # will be first. sample_1 = next(dataset) sample_2 = next(dataset) self.assertTrue(sample_1["label"] in [action_1, action_2]) if sample_1["label"] == action_2: sample_1, sample_2 = sample_2, sample_1 self.assertEqual(sample_1["label"], action_1) self.assertEqual(5, len(sample_1["meta_tags"])) self.assertTrue(sample_1["video"].equal( thwc_to_cthw(data_1).to(torch.float32))) self.assertEqual(sample_2["label"], action_2) self.assertEqual(5, len(sample_2["meta_tags"])) self.assertTrue(sample_2["video"].equal( thwc_to_cthw(data_2).to(torch.float32)))