def __init__(self, dataset_path, annotation_path, clip_length, frame_stride, video_transform=None, name="<NO_NAME>", return_item_subpath=False, shuffle_list_seed=None): super(VideoIterVal, self).__init__() # load params self.frames_stride = frame_stride self.dataset_path = dataset_path self.video_transform = video_transform self.return_item_subpath = return_item_subpath self.rng = np.random.RandomState( shuffle_list_seed if shuffle_list_seed else 0) # load video list self.video_list = self._get_video_list(dataset_path=self.dataset_path, annotation_path=annotation_path) self.total_clip_length_in_frames = clip_length * frame_stride self.video_clips = VideoClips( video_paths=self.video_list, clip_length_in_frames=self.total_clip_length_in_frames, frames_between_clips=self.total_clip_length_in_frames) logging.info( "VideoIter:: iterator initialized (phase: '{:s}', num: {:d})". format(name, len(self.video_list)))
def test_compute_clips_for_video(self): video_pts = torch.arange(30) # case 1: single clip num_frames = 13 orig_fps = 30 duration = float(len(video_pts)) / orig_fps new_fps = 13 clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames, orig_fps, new_fps) resampled_idxs = VideoClips._resample_video_idx(int(duration * new_fps), orig_fps, new_fps) assert len(clips) == 1 assert_equal(clips, idxs) assert_equal(idxs[0], resampled_idxs) # case 2: all frames appear only once num_frames = 4 orig_fps = 30 duration = float(len(video_pts)) / orig_fps new_fps = 12 clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames, orig_fps, new_fps) resampled_idxs = VideoClips._resample_video_idx(int(duration * new_fps), orig_fps, new_fps) assert len(clips) == 3 assert_equal(clips, idxs) assert_equal(idxs.flatten(), resampled_idxs) # case 3: frames aren't enough for a clip num_frames = 32 orig_fps = 30 new_fps = 13 with pytest.warns(UserWarning): clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames, orig_fps, new_fps) assert len(clips) == 0 assert len(idxs) == 0
def __init__(self, root, frames_per_clip, step_between_clips=1, frame_rate=None, extensions=('mp4', ), transform=None, cached=None, _precomputed_metadata=None): super(Kinetics400, self).__init__(root) extensions = extensions classes = list(sorted(list_dir(root))) class_to_idx = {classes[i]: i for i in range(len(classes))} self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) self.classes = classes video_list = [x[0] for x in self.samples] self.video_clips = VideoClips( video_list, frames_per_clip, step_between_clips, frame_rate, _precomputed_metadata, ) self.transform = transform
def test_compute_clips_for_video(self): video_pts = torch.arange(30) # case 1: single clip num_frames = 13 orig_fps = 30 duration = float(len(video_pts)) / orig_fps new_fps = 13 clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames, orig_fps, new_fps) resampled_idxs = VideoClips._resample_video_idx(int(duration * new_fps), orig_fps, new_fps) self.assertEqual(len(clips), 1) self.assertTrue(clips.equal(idxs)) self.assertTrue(idxs[0].equal(resampled_idxs)) # case 2: all frames appear only once num_frames = 4 orig_fps = 30 duration = float(len(video_pts)) / orig_fps new_fps = 12 clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames, orig_fps, new_fps) resampled_idxs = VideoClips._resample_video_idx(int(duration * new_fps), orig_fps, new_fps) self.assertEqual(len(clips), 3) self.assertTrue(clips.equal(idxs)) self.assertTrue(idxs.flatten().equal(resampled_idxs))
def __init__(self, clip_length, frame_stride, dataset_path=None, video_transform=None, return_label=False): super(VideoIter, self).__init__() # video clip properties self.frames_stride = frame_stride self.total_clip_length_in_frames = clip_length * frame_stride self.video_transform = video_transform # IO self.dataset_path = dataset_path self.video_list = self._get_video_list(dataset_path=self.dataset_path) self.return_label = return_label # data loading if os.path.exists('video_clips.file'): with open('video_clips.file', 'rb') as fp: self.video_clips = pickle.load(fp) else: self.video_clips = VideoClips( video_paths=self.video_list, clip_length_in_frames=self.total_clip_length_in_frames, frames_between_clips=self.total_clip_length_in_frames, ) if not os.path.exists('video_clips.file'): with open('video_clips.file', 'wb') as fp: pickle.dump(self.video_clips, fp, protocol=pickle.HIGHEST_PROTOCOL)
def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1, fold=1, train=True, framewiseTransform=False, transform=None): super(HMDB51, self).__init__(root) if not 1 <= fold <= 3: raise ValueError( "fold should be between 1 and 3, got {}".format(fold)) extensions = ('avi', ) self.fold = fold self.train = train classes = list(sorted(list_dir(root))) class_to_idx = {classes[i]: i for i in range(len(classes))} self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) self.classes = classes video_list = [x[0] for x in self.samples] video_clips = VideoClips(video_list, frames_per_clip, step_between_clips) self.indices = self._select_fold(video_list, annotation_path, fold, train) self.video_clips = video_clips.subset(self.indices) self.video_list = [video_list[i] for i in self.indices] self.framewiseTransform = framewiseTransform self.transform = transform
def __init__(self, clip_length, frame_stride, frame_rate=None, dataset_path=None, spatial_transform=None, temporal_transform=None, return_label=False, video_formats=["avi", "mp4"]): super(VideoDataset, self).__init__() # video clip properties self.frames_stride = frame_stride self.total_clip_length_in_frames = clip_length * frame_stride self.spatial_transform = spatial_transform self.temporal_transform = temporal_transform self.video_formats = video_formats # IO self.dataset_path = dataset_path self.video_list = self._get_video_list(dataset_path=self.dataset_path) # print("video_list:", self.video_list, len(self.video_list)) self.return_label = return_label # data loading self.video_clips = VideoClips(video_paths=self.video_list, clip_length_in_frames=self.total_clip_length_in_frames, frames_between_clips=self.total_clip_length_in_frames, frame_rate=frame_rate)
def __init__(self, root, train, frames_per_clip=16, step_between_clips=1, frame_rate=16, transform=None, extensions=('mp4',), label_fn=lambda x, *_: x, local_rank=-1, get_label_only=False): train_or_val = 'train' if train else 'val' root = os.path.join(root, train_or_val) self.root = root super().__init__(root) self.transform = transform # Function that takes in __getitem__ idx and returns auxiliary label information in the form of a tensor self.label_fn = MethodType(label_fn, self) self.get_label_only = get_label_only clips_fn = os.path.join(root, f'clips_{train_or_val}_{frames_per_clip}_{step_between_clips}_{frame_rate}.pt') try: self.video_clips = torch.load(clips_fn) except FileNotFoundError: video_list = list( map(str, itertools.chain.from_iterable(Path(root).rglob(f'*.{ext}') for ext in extensions))) random.shuffle(video_list) if local_rank <= 0: print('Generating video clips file: ' + clips_fn) self.video_clips = VideoClips( video_list, frames_per_clip, step_between_clips, frame_rate, num_workers=32 ) torch.save(self.video_clips, clips_fn) clip_lengths = torch.as_tensor([len(v) for v in self.video_clips.clips]) self.video_clips.clip_sizes = clip_lengths
def __init__(self, video_paths, clip_length_in_frames, stride, frame_rate, refresh, cache_dir): self.frame_rate = frame_rate self.clip_length_in_frames = clip_length_in_frames self.stride = stride self.video_paths = video_paths fname = f"fps-{frame_rate}-clip_length-{clip_length_in_frames}-stride{stride}" video_str_bytes = '-'.join(sorted(video_paths)).encode("utf-8") hashed = hashlib.sha256(video_str_bytes).hexdigest() fname += f"num-videos{len(video_paths)}-{hashed}" cached_clips_path = Path(cache_dir) / fname if cached_clips_path.exists() and not refresh: print(f"Reloading cached clips object") with open(cached_clips_path, "rb") as f: self.video_clips = pickle.load(f) else: print(f"Building new video clips object") self.video_clips = VideoClips( frame_rate=frame_rate, video_paths=video_paths, frames_between_clips=stride, clip_length_in_frames=clip_length_in_frames, ) cached_clips_path.parent.mkdir(exist_ok=True, parents=True) print(f"Writing object to cache at {cached_clips_path}") with open(cached_clips_path, "wb") as f: pickle.dump(self.video_clips, f)
def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1, frame_rate=None, fold=1, train=True, transform=None, _precomputed_metadata=None, num_workers=1, _video_width=0, _video_height=0, _video_min_dimension=0, _audio_samples=0): super(MYUCF101, self).__init__(root) if not 1 <= fold <= 3: raise ValueError( "fold should be between 1 and 3, got {}".format(fold)) extensions = ('avi', ) self.fold = fold self.train = train classes = list(sorted(list_dir(root))) class_to_idx = {classes[i]: i for i in range(len(classes))} self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) self.classes = classes video_list = [x[0] for x in self.samples] video_clips = VideoClips( video_list, frames_per_clip, step_between_clips, frame_rate, _precomputed_metadata, num_workers=num_workers, _video_width=_video_width, _video_height=_video_height, _video_min_dimension=_video_min_dimension, _audio_samples=_audio_samples, ) meta_data_str_ = os.path.join( root, f"meta_data_train_{train}_fold_{fold}_frames_{frames_per_clip}_skip_" f"{step_between_clips}.pickle") if not os.path.exists(meta_data_str_): with open(meta_data_str_, 'wb') as ff: pickle.dump(video_clips.metadata, ff) self.video_clips_metadata = video_clips.metadata self.indices = self._select_fold(video_list, annotation_path, fold, train) self.video_clips = video_clips.subset(self.indices) self.transform = transform
def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1, frame_rate=None, fold=1, train=True, transform=None, _precomputed_metadata=None, num_workers=1, _video_width=0, _video_height=0, _video_min_dimension=0, _audio_samples=0): super(UCF101, self).__init__(root) if not 1 <= fold <= 3: raise ValueError( "fold should be between 1 and 3, got {}".format(fold)) extensions = ('avi', ) self.fold = fold self.train = train classes = list(sorted(list_dir(root))) class_to_idx = {classes[i]: i for i in range(len(classes))} self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) self.classes = classes video_list = [x[0] for x in self.samples] metadata_filepath = os.path.join(root, 'ucf101_metadata.pt') if os.path.exists(metadata_filepath): metadata = torch.load(metadata_filepath) else: metadata = None video_clips = VideoClips( video_list, frames_per_clip, step_between_clips, frame_rate, metadata, num_workers=num_workers, _video_width=_video_width, _video_height=_video_height, _video_min_dimension=_video_min_dimension, _audio_samples=_audio_samples, ) if not os.path.exists(metadata_filepath): torch.save(video_clips.metadata, metadata_filepath) self.video_clips_metadata = video_clips.metadata self.indices = self._select_fold(video_list, annotation_path, fold, train) self.video_clips = video_clips.subset(self.indices) self.transform = transform
def test_video_clips_custom_fps(self, tmpdir): video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[12, 12, 12], fps=[3, 4, 6]) num_frames = 4 for fps in [1, 3, 4, 10]: video_clips = VideoClips(video_list, num_frames, num_frames, fps, num_workers=2) for i in range(video_clips.num_clips()): video, audio, info, video_idx = video_clips.get_clip(i) assert video.shape[0] == num_frames assert info["video_fps"] == fps
def test_video_clips_custom_fps(self): with get_list_of_videos(num_videos=3, sizes=[12, 12, 12], fps=[3, 4, 6]) as video_list: num_frames = 4 for fps in [1, 3, 4, 10]: video_clips = VideoClips(video_list, num_frames, num_frames, fps) for i in range(video_clips.num_clips()): video, audio, info, video_idx = video_clips.get_clip(i) self.assertEqual(video.shape[0], num_frames) self.assertEqual(info["video_fps"], fps)
class Mice(VisionDataset): def __init__(self, root, frames_per_clip, step_between_clips=1, frame_rate=None, extensions=("mp4", ), transform=None, _precomputed_metadata=None, num_workers=1, _video_width=0, _video_height=0, _video_min_dimension=0, _audio_samples=0, _audio_channels=0): super(Mice, self).__init__(root) classes = list(sorted(list_dir(root))) class_to_idx = {classes[i]: i for i in range(len(classes))} self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) self.classes = classes video_list = [x[0] for x in self.samples] self.video_clips = VideoClips( video_list, frames_per_clip, step_between_clips, frame_rate, _precomputed_metadata, num_workers=num_workers, _video_width=_video_width, _video_height=_video_height, _video_min_dimension=_video_min_dimension, _audio_samples=_audio_samples, _audio_channels=_audio_channels, ) self.transform = transform @property def metadata(self): return self.video_clips.metadata def __len__(self): return self.video_clips.num_clips() def __getitem__(self, idx): video, _, _, video_idx = self.video_clips.get_clip(idx) video_idx, clip_idx = self.video_clips.get_clip_location(idx) label = self.samples[video_idx][1] if self.transform is not None: video = self.transform(video) return video, label, video_idx, clip_idx
def DownsampleClipSampler(video_clips: VideoClips, labels: List[int]): vc_labels = [ labels[video_clips.get_clip_location(idx)[0]] for idx in range(video_clips.num_clips()) ] cnt = min(vc_labels.count(a) for a in set(labels)) indices = [] for a in set(labels): indices += random.sample( [i for i, c in enumerate(vc_labels) if c == a], cnt) return SubsetRandomSampler(indices)
def BalancedClipSampler(video_clips: VideoClips, clip_labels: List[int], num_samples=None, log_weight=False): assert len(video_clips.clips) == len(clip_labels) vc_labels = [ clip_labels[video_clips.get_clip_location(idx)[0]] for idx in range(video_clips.num_clips()) ] if num_samples is None: num_samples = len(video_clips.video_paths) return BalancedSampler(vc_labels, num_samples, log_weight)
def init_data(self, root, frames_per_clip, step_between_clips=6, frame_rate=6, train=True, transform=None, _precomputed_metadata=None, num_workers=1, _video_width=0, _video_height=0, _video_min_dimension=0, _audio_samples=0): super(HMDB51, self).__init__(root) extensions = ('avi', ) if train: root = root + "/train" else: root = root + "/test" classes = sorted(list_dir(root)) class_to_idx = {class_: i for (i, class_) in enumerate(classes)} print(class_to_idx) self.samples = [] for target_class in sorted(class_to_idx.keys()): class_index = class_to_idx[target_class] target_dir = os.path.join(root, target_class) for root_curr, _, fnames in sorted( os.walk(target_dir, followlinks=True)): for fname in sorted(fnames): path = os.path.join(root_curr, fname) if os.path.isfile(path): item = path, class_index self.samples.append(item) video_paths = [path for (path, _) in self.samples] video_clips = VideoClips( video_paths, frames_per_clip, step_between_clips, frame_rate, _precomputed_metadata, num_workers=num_workers, _video_width=_video_width, _video_height=_video_height, _video_min_dimension=_video_min_dimension, _audio_samples=_audio_samples, ) self.train = train self.classes = classes self.video_clips_metadata = video_clips.metadata self.indices = self.get_indices(video_paths) self.video_clips = video_clips.subset(self.indices) self.transform = transform
class MyVideoDataset(object): def __init__(self, video_paths): self.video_clips = VideoClips(video_paths, clip_length_in_frames=16, frames_between_clips=1, frame_rate=15) def __getitem__(self, idx): video, audio, info, video_idx = self.video_clips.get_clip(idx) return video, audio def __len__(self): return self.video_clips.num_clips()
def BalancedPathSampler(video_clips: VideoClips, clip_labels: List[int], num_samples=None, log_weight=False): assert len(video_clips.clips) == len(clip_labels) vc_labels = [] for idx in range(video_clips.num_clips()): vidx, _ = video_clips.get_clip_location(idx) vc_labels.append((clip_labels[vidx], video_clips.video_paths[vidx])) if num_samples is None: num_samples = len(video_clips.video_paths) return BalancedSampler(vc_labels, num_samples, log_weight)
def test_distributed_sampler_and_uniform_clip_sampler(self): with get_list_of_videos(num_videos=3, sizes=[25, 25, 25]) as video_list: video_clips = VideoClips(video_list, 5, 5) clip_sampler = UniformClipSampler(video_clips, 3) distributed_sampler_rank0 = DistributedSampler( clip_sampler, num_replicas=2, rank=0, group_size=3, ) indices = torch.tensor(list(iter(distributed_sampler_rank0))) self.assertEqual(len(distributed_sampler_rank0), 6) self.assertTrue(indices.equal(torch.tensor([0, 2, 4, 10, 12, 14]))) distributed_sampler_rank1 = DistributedSampler( clip_sampler, num_replicas=2, rank=1, group_size=3, ) indices = torch.tensor(list(iter(distributed_sampler_rank1))) self.assertEqual(len(distributed_sampler_rank1), 6) self.assertTrue(indices.equal(torch.tensor([5, 7, 9, 0, 2, 4])))
def test_distributed_sampler_and_uniform_clip_sampler(self, tmpdir): video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[25, 25, 25]) video_clips = VideoClips(video_list, 5, 5) clip_sampler = UniformClipSampler(video_clips, 3) distributed_sampler_rank0 = DistributedSampler( clip_sampler, num_replicas=2, rank=0, group_size=3, ) indices = torch.tensor(list(iter(distributed_sampler_rank0))) assert len(distributed_sampler_rank0) == 6 assert_equal(indices, torch.tensor([0, 2, 4, 10, 12, 14])) distributed_sampler_rank1 = DistributedSampler( clip_sampler, num_replicas=2, rank=1, group_size=3, ) indices = torch.tensor(list(iter(distributed_sampler_rank1))) assert len(distributed_sampler_rank1) == 6 assert_equal(indices, torch.tensor([5, 7, 9, 0, 2, 4]))
def test_uniform_clip_sampler_insufficient_clips(self, tmpdir): video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[10, 25, 25]) video_clips = VideoClips(video_list, 5, 5) sampler = UniformClipSampler(video_clips, 3) assert len(sampler) == 3 * 3 indices = torch.tensor(list(iter(sampler))) assert_equal(indices, torch.tensor([0, 0, 1, 2, 4, 6, 7, 9, 11]))
def __init__(self, root, frames_per_clip, step_between_clips=1, frame_rate=None, extensions=('avi', ), transform=None, num_workers=1, _video_width=0, _video_height=0, _video_min_dimension=0, _audio_samples=0): super(Kinetics400, self).__init__(root) classes = list(sorted(list_dir(root))) class_to_idx = {classes[i]: i for i in range(len(classes))} self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) self.classes = classes video_list = [x[0] for x in self.samples] split = root.split('/')[-1].strip('/') metadata_filepath = os.path.join( root, 'kinetics_metadata_{}.pt'.format(split)) if os.path.exists(metadata_filepath): metadata = torch.load(metadata_filepath) else: metadata = None self.video_clips = VideoClips( video_list, frames_per_clip, step_between_clips, frame_rate, metadata, num_workers=num_workers, _video_width=_video_width, _video_height=_video_height, _video_min_dimension=_video_min_dimension, _audio_samples=_audio_samples, ) self.transform = transform if not os.path.exists(metadata_filepath): torch.save(self.video_clips.metadata, metadata_filepath)
def sliding_window(video_path, save_path, epoch_id, preprocess=[]): T = 300 videoclips = VideoClips([video_path], clip_length_in_frames=T, frames_between_clips=1) filenames = [] rotation = 0 sample_count = -1 sample_dir = "" sample_id = "" for i in range(len(videoclips)): sample_count = hash( str(sample_count + 1 + epoch_id * (len(videoclips) / T))) % ((sys.maxsize + 1) * 2) # create new preprocess values rnd = np.random.uniform(-1, 1) rotation = 5 * rnd scale_factor = np.random.uniform(0.8, 1.2) crop_scale_y = np.random.uniform(0.5, 1) crop_scale_x = np.random.uniform(0.5, 1) # Preprocess clip, _, _, _ = videoclips.get_clip(i) clip = clip.numpy() for f in range(len(clip)): for p in preprocess: clip[f] = p(clip[f], rotation=rotation, scale_factor=scale_factor, crop_scale=(crop_scale_y, crop_scale_x)) clip = torch.tensor(clip) # Save filename = "{}.mp4".format(hex(sample_count)) filepath = join(save_path, filename) torchvision.io.write_video(filepath, clip, 30) filenames.append(filename) print("{}, {}, {}/{}".format(filepath, epoch_id, i, len(videoclips))) return filenames
class MyVideoDataset(data.Dataset): def __init__(self, root, data_dirs, labels, n_frames=30, fps=5, spatial_transform=None, temporal_transform=None, random_slice_size=0): data_dirs = [os.path.join(root, d + ".mp4") for d in data_dirs] self.videos = data_dirs self.labels = labels self.video_clips = VideoClips(self.videos, clip_length_in_frames=n_frames, frames_between_clips=n_frames, frame_rate=fps, num_workers=2) self.spatial_transform = spatial_transform self.temporal_transform = temporal_transform self.data_mean = None self.data_std = None self.random_slice_size = random_slice_size def set_stats(self, mean, std): self.data_mean, self.data_std = mean, std def __getitem__(self, idx): video, audio, info, video_idx = self.video_clips.get_clip(idx) if self.random_slice_size: video = T.RandomSlice(self.random_slice_size)(video) if self.temporal_transform is not None: video = self.temporal_transform(video) if self.spatial_transform is not None: video = self.spatial_transform(video) if self.data_mean is not None and self.data_std is not None: video = T.Normalize(mean=self.data_mean, std=self.data_std)(video) label = self.labels[video_idx] print(video_idx, "--- ", self.video_clips.video_paths[video_idx], "--- ", label) return idx, video, label, video_idx def __len__(self): return self.video_clips.num_clips()
def test_uniform_clip_sampler_insufficient_clips(self): with get_list_of_videos(num_videos=3, sizes=[10, 25, 25]) as video_list: video_clips = VideoClips(video_list, 5, 5) sampler = UniformClipSampler(video_clips, 3) self.assertEqual(len(sampler), 3 * 3) indices = torch.tensor(list(iter(sampler))) assert_equal(indices, torch.tensor([0, 0, 1, 2, 4, 6, 7, 9, 11]))
def __init__( self, root, data_file, frames_per_clip, step_between_clips=1, frame_rate=None, extension="mp4", transform=None, _precomputed_metadata=None, num_workers=1, _video_width=0, _video_height=0, _video_min_dimension=0, _audio_samples=0, _audio_channels=0, ) -> "MiniKinetics200Dataset": assert os.path.exists(data_file), f"Data file {data_file} is missing" self.samples = [] with open(data_file, "r") as fp: for line in fp.readlines(): video_id, class_name, class_label = line.strip().split(",") class_name = class_name.replace("_", " ") video_path = os.path.join( root, class_name, f"{video_id}.{extension}", ) if os.path.exists(video_path): self.samples.append([video_path, int(class_label)]) video_list = [x[0] for x in self.samples] self.video_clips = VideoClips( video_list, frames_per_clip, step_between_clips, frame_rate, _precomputed_metadata, num_workers=num_workers, _video_width=_video_width, _video_height=_video_height, _video_min_dimension=_video_min_dimension, _audio_samples=_audio_samples, _audio_channels=_audio_channels, ) self.transform = transform
def __init__(self, transforms=None, train=True, test=False, count_videos=-1, count_clips=-1, skip_videoframes=5, num_videoframes=100, dist_videoframes=50, video_directory=None, fps=5): # If count_videos <= 0, use all the videos. If count_clips <= 0, use # all the clips from all the videos. self.train = train self.transforms = transforms self.video_directory = video_directory self.skip_videoframes = skip_videoframes self.num_videoframes = num_videoframes self.dist_videoframes = dist_videoframes self.video_files = sorted([ os.path.join(video_directory, f) for f in os.listdir(video_directory) \ if f.endswith('mp4') ]) if count_videos > 0: self.video_files = self.video_files[:count_videos] clip_length_in_frames = self.num_videoframes * self.skip_videoframes frames_between_clips = self.dist_videoframes self.saved_video_clips = os.path.join( video_directory, 'video_clips.%dnf.%df.%ds.pkl' % (count_videos, clip_length_in_frames, frames_between_clips)) if os.path.exists(self.saved_video_clips): print('Path Exists for video_clips: ', self.saved_video_clips) self.video_clips = pickle.load(open(self.saved_video_clips, 'rb')) else: print('Path does NOT exist for video_clips: ', self.saved_video_clips) self.video_clips = VideoClips( self.video_files, clip_length_in_frames=clip_length_in_frames, frames_between_clips=frames_between_clips, frame_rate=fps) pickle.dump(self.video_clips, open(self.saved_video_clips, 'wb')) self.datums = self._retrieve_valid_datums(count_videos, count_clips) print(self.datums)
def __init__( self, video_dir, label_map_json, labels_json, frames_per_clip, step_between_clips=1, frame_rate=None, transform=None, _precomputed_metadata=None, num_workers=1, _video_width=0, _video_height=0, _video_min_dimension=0, ) -> "_SomethingSomethingV2Dataset": for data_file in [label_map_json, labels_json]: assert os.path.exists( data_file), f"Data file {data_file} is missing" with open(label_map_json, "r") as fp: label_map = json.load(fp) with open(labels_json, "r") as fp: samples = json.load(fp) self.samples = [] for sample in samples: video_id = sample["id"] label = sample["template"].replace("[", "").replace("]", "") assert label in label_map, f"Unknown label: {label}" video_path = os.path.join(video_dir, f"{video_id}.webm") assert os.path.exists(video_path), f"{video_path} is missing" self.samples.append((video_path, int(label_map[label]))) video_list = [x[0] for x in self.samples] self.video_clips = VideoClips( video_list, frames_per_clip, step_between_clips, frame_rate, _precomputed_metadata, num_workers=num_workers, _video_width=_video_width, _video_height=_video_height, _video_min_dimension=_video_min_dimension, ) self.transform = transform
def __init__(self, root, index_path, *, frames_per_clip, step_between_clips, frame_rate, extensions=('mp4', ), transform=None, _precomputed_metadata=None, num_workers=1, _video_width=0, _video_height=0, _video_min_dimension=0, _audio_samples=0): super(Kinetics400Indexed, self).__init__(root) self.index_path = index_path with open(index_path) as f: index = json.load(f) classes = index['classes'] self.classes = classes self.samples = [(os.path.join(root, path), label) for path, label in index['samples']] ''' classes = list(sorted(list_dir(root))) class_to_idx = {classes[i]: i for i in range(len(classes))} self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) self.classes = classes ''' video_list = [x[0] for x in self.samples] self.video_clips = VideoClips( video_list, frames_per_clip, step_between_clips, frame_rate, _precomputed_metadata, num_workers=num_workers, _video_width=_video_width, _video_height=_video_height, _video_min_dimension=_video_min_dimension, _audio_samples=_audio_samples, ) self.transform = transform