Exemplo n.º 1
0
 def test_load_dataclass_dict_from_csv_throws(self):
     dataclass_objs = [
         TestDataclass2("a", 1),
         TestDataclass2("a", 2),
         TestDataclass2("b", 3),
         TestDataclass2("c", 4),
         TestDataclass2("c", 4),
         TestDataclass2("c", 4),
     ]
     with tempfile.TemporaryDirectory(prefix=f"{TestDataUtils}") as tempdir:
         csv_file_name = Path(tempdir) / "data.csv"
         save_dataclass_objs_to_headered_csv(dataclass_objs, csv_file_name)
         self.assertRaises(
             AssertionError,
             lambda: load_dataclass_dict_from_csv(
                 csv_file_name, TestDataclass2, "a", list_per_key=False),
         )
Exemplo n.º 2
0
    def test_load_dataclass_dict_from_csv_value_dict(self):
        dataclass_objs = [
            TestDataclass2("a", 1),
            TestDataclass2("b", 2),
            TestDataclass2("c", 3),
            TestDataclass2("d", 4),
        ]
        with tempfile.TemporaryDirectory(prefix=f"{TestDataUtils}") as tempdir:
            csv_file_name = Path(tempdir) / "data.csv"
            save_dataclass_objs_to_headered_csv(dataclass_objs, csv_file_name)

            test_dict = load_dataclass_dict_from_csv(csv_file_name,
                                                     TestDataclass2,
                                                     "a",
                                                     list_per_key=False)
            self.assertEqual(len(test_dict), 4)
            self.assertEqual(test_dict["c"].b, 3)
Exemplo n.º 3
0
    def test_save_dataclass_objs_to_headered_csv(self):
        dataclass_objs = [
            TestDataclass2("a", 1),
            TestDataclass2("a", 2),
            TestDataclass2("b", 3),
        ]

        with tempfile.TemporaryDirectory(prefix=f"{TestDataUtils}") as tempdir:
            csv_file_name = Path(tempdir) / "data.csv"
            save_dataclass_objs_to_headered_csv(dataclass_objs, csv_file_name)
            with open(csv_file_name) as f:
                lines = list(f.readlines())
                self.assertEqual(len(lines), 4)
                self.assertEqual(lines[0], "a,b\n")
                self.assertEqual(lines[1], "a,1\n")
                self.assertEqual(lines[2], "a,2\n")
                self.assertEqual(lines[3], "b,3\n")
def save_video_frame_info(video_frames: Dict[str, VideoFrameInfo],
                          file_name: str = None) -> str:
    """
    Saves the video frame dictionary as a csv file that can be read for future usage.

    Args:
        video_frames (Dict[str, VideoFrameInfo]):
            Dictionary mapping video_ids to metadata about the location of
            their video frame files.

        file_name (str):
            location to save file (will be automatically generated if None).

    Returns:
        string of the filename where the video info is stored.
    """
    file_name = (f"{os.getcwd()}/video_frame_metadata.csv"
                 if file_name is None else file_name)
    save_dataclass_objs_to_headered_csv(list(video_frames.values()), file_name)
    return file_name
Exemplo n.º 5
0
 def test_load_dataclass_dict_from_csv_list_dict(self):
     dataclass_objs = [
         TestDataclass2("a", 1),
         TestDataclass2("a", 2),
         TestDataclass2("b", 3),
         TestDataclass2("c", 4),
         TestDataclass2("c", 4),
         TestDataclass2("c", 4),
     ]
     with tempfile.TemporaryDirectory(prefix=f"{TestDataUtils}") as tempdir:
         csv_file_name = Path(tempdir) / "data.csv"
         save_dataclass_objs_to_headered_csv(dataclass_objs, csv_file_name)
         test_dict = load_dataclass_dict_from_csv(csv_file_name,
                                                  TestDataclass2,
                                                  "a",
                                                  list_per_key=True)
         self.assertEqual(len(test_dict), 3)
         self.assertEqual([x.b for x in test_dict["a"]], [1, 2])
         self.assertEqual([x.b for x in test_dict["b"]], [3])
         self.assertEqual([x.b for x in test_dict["c"]], [4, 4, 4])
Exemplo n.º 6
0
    def test__len__(self, dataset_type):
        with tempfile.TemporaryDirectory(prefix=f"{TestDomsevDataset}") as tempdir:
            tempdir = Path(tempdir)

            video_info_file = tempdir / "test_video_info.csv"
            save_dataclass_objs_to_headered_csv(
                list(MOCK_VIDEO_INFOS.values()), video_info_file
            )
            activity_file = tempdir / "activity_video_info.csv"
            activities = []
            for activity_list in self.ACTIVITIES_DATA.values():
                for activity in activity_list:
                    activities.append(activity)
            save_dataclass_objs_to_headered_csv(activities, activity_file)

            video_data_manifest_file_path = (
                tempdir / "video_data_manifest_file_path.json"
            )
            with ExitStack() as stack:
                if dataset_type == VideoDatasetType.Frame:
                    video_data_dict = get_flat_video_frames(tempdir, "jpg")
                elif dataset_type == VideoDatasetType.EncodedVideo:
                    video_data_dict = get_encoded_video_infos(tempdir, stack)

                save_dataclass_objs_to_headered_csv(
                    list(video_data_dict.values()), video_data_manifest_file_path
                )
                video_ids = list(self.ACTIVITIES_DATA)
                dataset = DomsevDataset(
                    video_data_manifest_file_path=str(video_data_manifest_file_path),
                    video_info_file_path=str(video_info_file),
                    activities_file_path=str(activity_file),
                    dataset_type=dataset_type,
                    clip_sampler=lambda x, y: [
                        VideoClipInfo(video_ids[i // 2], i * 2.0, i * 2.0 + 0.9)
                        for i in range(0, 7)
                    ],
                )

                self.assertEqual(len(dataset._videos), 4)
                total_activities = [
                    activity
                    for video_activities in list(dataset._activities.values())
                    for activity in video_activities
                ]
                self.assertEqual(len(total_activities), 6)
                self.assertEqual(len(dataset), 7)  # Num clips
Exemplo n.º 7
0
    def test__len__(self, dataset_type):
        with tempfile.TemporaryDirectory(
                prefix=f"{TestEpicKitchenDataset}") as tempdir:
            tempdir = Path(tempdir)

            video_info_file = tempdir / "test_video_info.csv"
            save_dataclass_objs_to_headered_csv(
                list(MOCK_VIDEO_INFOS.values()), video_info_file)
            action_file = tempdir / "action_video_info.csv"
            actions = []
            for action_list in self.ACTIONS_DATAS.values():
                for action in action_list:
                    actions.append(action)
            save_dataclass_objs_to_headered_csv(actions, action_file)

            video_data_manifest_file_path = (
                tempdir / "video_data_manifest_file_path.json")
            with ExitStack() as stack:
                if dataset_type == VideoDatasetType.Frame:
                    video_data_dict = get_flat_video_frames(tempdir, "jpg")
                elif dataset_type == VideoDatasetType.EncodedVideo:
                    video_data_dict = get_encoded_video_infos(tempdir, stack)

                save_dataclass_objs_to_headered_csv(
                    list(video_data_dict.values()),
                    video_data_manifest_file_path)

                dataset = EpicKitchenDataset(
                    video_info_file_path=str(video_info_file),
                    actions_file_path=str(action_file),
                    clip_sampler=lambda x, y: [
                        VideoClipInfo(str(i), i * 2.0, i * 2.0 + 0.9)
                        for i in range(0, 7)
                    ],
                    video_data_manifest_file_path=str(
                        video_data_manifest_file_path),
                    dataset_type=dataset_type,
                )

                self.assertEqual(len(dataset), 7)
Exemplo n.º 8
0
    def test__getitem__(self, dataset_type):
        with tempfile.TemporaryDirectory(prefix=f"{TestDomsevDataset}") as tempdir:
            tempdir = Path(tempdir)

            video_info_file = tempdir / "test_video_info.csv"
            save_dataclass_objs_to_headered_csv(
                list(MOCK_VIDEO_INFOS.values()), video_info_file
            )
            activity_file = tempdir / "activity_video_info.csv"
            activities = []
            for activity_list in self.ACTIVITIES_DATA.values():
                for activity in activity_list:
                    activities.append(activity)
            save_dataclass_objs_to_headered_csv(activities, activity_file)

            video_data_manifest_file_path = (
                tempdir / "video_data_manifest_file_path.json"
            )
            with ExitStack() as stack:
                if dataset_type == VideoDatasetType.Frame:
                    video_data_dict = get_flat_video_frames(tempdir, "jpg")
                elif dataset_type == VideoDatasetType.EncodedVideo:
                    video_data_dict = get_encoded_video_infos(tempdir, stack)

                save_dataclass_objs_to_headered_csv(
                    list(video_data_dict.values()), video_data_manifest_file_path
                )
                video_ids = list(self.ACTIVITIES_DATA)
                dataset = DomsevDataset(
                    video_data_manifest_file_path=str(video_data_manifest_file_path),
                    video_info_file_path=str(video_info_file),
                    activities_file_path=str(activity_file),
                    dataset_type=dataset_type,
                    clip_sampler=lambda x, y: [
                        VideoClipInfo(video_ids[i // 2], i * 2.0, i * 2.0 + 0.9)
                        for i in range(0, 7)
                    ],
                )

                get_clip_string = (
                    "pytorchvideo.data.frame_video.FrameVideo.get_clip"
                    if dataset_type == VideoDatasetType.Frame
                    else "pytorchvideo.data.encoded_video.EncodedVideo.get_clip"
                )
                with unittest.mock.patch(
                    get_clip_string,
                    return_value=({"video": torch.rand(3, 5, 10, 20), "audio": []}),
                ) as _:
                    clip_1 = dataset.__getitem__(1)
                    for i, a in enumerate(clip_1["activities"]):
                        self.assertEqual(a, self.ACTIVITIES_DATA[video_ids[0]][i])
                    self.assertEqual(clip_1["start_time"], 2.0)
                    self.assertEqual(clip_1["stop_time"], 2.9)
                    self.assertEqual(clip_1["video_id"], MOCK_VIDEO_IDS[0])

                    clip_2 = dataset.__getitem__(2)
                    for i, a in enumerate(clip_2["activities"]):
                        self.assertEqual(a, self.ACTIVITIES_DATA[video_ids[1]][i])
                    self.assertEqual(clip_2["start_time"], 4.0)
                    self.assertEqual(clip_2["stop_time"], 4.9)
                    self.assertEqual(clip_2["video_id"], MOCK_VIDEO_IDS[1])