Exemplo n.º 1
0
 def define_clip_structure(
     videos: Dict[str, Video], video_actions: Dict[str, List[ActionData]]
 ) -> List[VideoClipInfo]:
     candidate_sample_clips = []
     for video_id, actions in video_actions.items():
         for i, action in enumerate(actions[: (-1 * num_forecast_actions)]):
             # Only actions with num_forecast_actions after to predict
             # Confirm there are >= num_forecast_actions available
             # (it is possible for actions to overlap)
             number_valid_actions = 0
             for j in range(i + 1, len(actions)):
                 if actions[j].start_time > action.stop_time:
                     number_valid_actions += 1
                 if number_valid_actions == num_forecast_actions:
                     if (
                         action.start_time - time_window_length >= 0
                     ):  # Only add clips that have the full input video available
                         candidate_sample_clips.append(
                             VideoClipInfo(
                                 video_id,
                                 action.stop_time - time_window_length,
                                 action.stop_time,
                             )
                         )
                     break
     return candidate_sample_clips
Exemplo n.º 2
0
        def define_clip_structure(
            videos: Dict[str, Video], actions: Dict[str, List[ActionData]]
        ) -> List[VideoClipInfo]:
            clips = []
            for video_id, video in videos.items():
                offset = random.random() * seconds_per_clip
                num_clips = int((video.duration - offset) // seconds_per_clip)

                for i in range(num_clips):
                    start_time = i * seconds_per_clip + offset
                    stop_time = start_time + seconds_per_clip
                    clip = VideoClipInfo(video_id, start_time, stop_time)
                    clips.append(clip)
            return clips
Exemplo n.º 3
0
    def test__len__(self, dataset_type):
        with tempfile.TemporaryDirectory(prefix=f"{TestDomsevDataset}") as tempdir:
            tempdir = Path(tempdir)

            video_info_file = tempdir / "test_video_info.csv"
            save_dataclass_objs_to_headered_csv(
                list(MOCK_VIDEO_INFOS.values()), video_info_file
            )
            activity_file = tempdir / "activity_video_info.csv"
            activities = []
            for activity_list in self.ACTIVITIES_DATA.values():
                for activity in activity_list:
                    activities.append(activity)
            save_dataclass_objs_to_headered_csv(activities, activity_file)

            video_data_manifest_file_path = (
                tempdir / "video_data_manifest_file_path.json"
            )
            with ExitStack() as stack:
                if dataset_type == VideoDatasetType.Frame:
                    video_data_dict = get_flat_video_frames(tempdir, "jpg")
                elif dataset_type == VideoDatasetType.EncodedVideo:
                    video_data_dict = get_encoded_video_infos(tempdir, stack)

                save_dataclass_objs_to_headered_csv(
                    list(video_data_dict.values()), video_data_manifest_file_path
                )
                video_ids = list(self.ACTIVITIES_DATA)
                dataset = DomsevDataset(
                    video_data_manifest_file_path=str(video_data_manifest_file_path),
                    video_info_file_path=str(video_info_file),
                    activities_file_path=str(activity_file),
                    dataset_type=dataset_type,
                    clip_sampler=lambda x, y: [
                        VideoClipInfo(video_ids[i // 2], i * 2.0, i * 2.0 + 0.9)
                        for i in range(0, 7)
                    ],
                )

                self.assertEqual(len(dataset._videos), 4)
                total_activities = [
                    activity
                    for video_activities in list(dataset._activities.values())
                    for activity in video_activities
                ]
                self.assertEqual(len(total_activities), 6)
                self.assertEqual(len(dataset), 7)  # Num clips
Exemplo n.º 4
0
    def test__len__(self, dataset_type):
        with tempfile.TemporaryDirectory(
                prefix=f"{TestEpicKitchenDataset}") as tempdir:
            tempdir = Path(tempdir)

            video_info_file = tempdir / "test_video_info.csv"
            save_dataclass_objs_to_headered_csv(
                list(MOCK_VIDEO_INFOS.values()), video_info_file)
            action_file = tempdir / "action_video_info.csv"
            actions = []
            for action_list in self.ACTIONS_DATAS.values():
                for action in action_list:
                    actions.append(action)
            save_dataclass_objs_to_headered_csv(actions, action_file)

            video_data_manifest_file_path = (
                tempdir / "video_data_manifest_file_path.json")
            with ExitStack() as stack:
                if dataset_type == VideoDatasetType.Frame:
                    video_data_dict = get_flat_video_frames(tempdir, "jpg")
                elif dataset_type == VideoDatasetType.EncodedVideo:
                    video_data_dict = get_encoded_video_infos(tempdir, stack)

                save_dataclass_objs_to_headered_csv(
                    list(video_data_dict.values()),
                    video_data_manifest_file_path)

                dataset = EpicKitchenDataset(
                    video_info_file_path=str(video_info_file),
                    actions_file_path=str(action_file),
                    clip_sampler=lambda x, y: [
                        VideoClipInfo(str(i), i * 2.0, i * 2.0 + 0.9)
                        for i in range(0, 7)
                    ],
                    video_data_manifest_file_path=str(
                        video_data_manifest_file_path),
                    dataset_type=dataset_type,
                )

                self.assertEqual(len(dataset), 7)
Exemplo n.º 5
0
    def test__getitem__(self, dataset_type):
        with tempfile.TemporaryDirectory(prefix=f"{TestDomsevDataset}") as tempdir:
            tempdir = Path(tempdir)

            video_info_file = tempdir / "test_video_info.csv"
            save_dataclass_objs_to_headered_csv(
                list(MOCK_VIDEO_INFOS.values()), video_info_file
            )
            activity_file = tempdir / "activity_video_info.csv"
            activities = []
            for activity_list in self.ACTIVITIES_DATA.values():
                for activity in activity_list:
                    activities.append(activity)
            save_dataclass_objs_to_headered_csv(activities, activity_file)

            video_data_manifest_file_path = (
                tempdir / "video_data_manifest_file_path.json"
            )
            with ExitStack() as stack:
                if dataset_type == VideoDatasetType.Frame:
                    video_data_dict = get_flat_video_frames(tempdir, "jpg")
                elif dataset_type == VideoDatasetType.EncodedVideo:
                    video_data_dict = get_encoded_video_infos(tempdir, stack)

                save_dataclass_objs_to_headered_csv(
                    list(video_data_dict.values()), video_data_manifest_file_path
                )
                video_ids = list(self.ACTIVITIES_DATA)
                dataset = DomsevDataset(
                    video_data_manifest_file_path=str(video_data_manifest_file_path),
                    video_info_file_path=str(video_info_file),
                    activities_file_path=str(activity_file),
                    dataset_type=dataset_type,
                    clip_sampler=lambda x, y: [
                        VideoClipInfo(video_ids[i // 2], i * 2.0, i * 2.0 + 0.9)
                        for i in range(0, 7)
                    ],
                )

                get_clip_string = (
                    "pytorchvideo.data.frame_video.FrameVideo.get_clip"
                    if dataset_type == VideoDatasetType.Frame
                    else "pytorchvideo.data.encoded_video.EncodedVideo.get_clip"
                )
                with unittest.mock.patch(
                    get_clip_string,
                    return_value=({"video": torch.rand(3, 5, 10, 20), "audio": []}),
                ) as _:
                    clip_1 = dataset.__getitem__(1)
                    for i, a in enumerate(clip_1["activities"]):
                        self.assertEqual(a, self.ACTIVITIES_DATA[video_ids[0]][i])
                    self.assertEqual(clip_1["start_time"], 2.0)
                    self.assertEqual(clip_1["stop_time"], 2.9)
                    self.assertEqual(clip_1["video_id"], MOCK_VIDEO_IDS[0])

                    clip_2 = dataset.__getitem__(2)
                    for i, a in enumerate(clip_2["activities"]):
                        self.assertEqual(a, self.ACTIVITIES_DATA[video_ids[1]][i])
                    self.assertEqual(clip_2["start_time"], 4.0)
                    self.assertEqual(clip_2["stop_time"], 4.9)
                    self.assertEqual(clip_2["video_id"], MOCK_VIDEO_IDS[1])