def create_sample_table(self):
     table_info = DataFrameMetadata("dataset", 'dataset')
     column_1 = DataFrameColumn("id", ColumnType.INTEGER, False)
     column_2 = DataFrameColumn("data", ColumnType.NDARRAY, False,
                                [2, 2, 3])
     table_info.schema = [column_1, column_2]
     return table_info
示例#2
0
    def test_df_metadata(self):
        df_metadata = DataFrameMetadata('name', 'eva_dataset')
        column_1 = DataFrameColumn("frame_id", ColumnType.INTEGER, False)
        column_2 = DataFrameColumn("frame_label", ColumnType.INTEGER, False)
        col_list = [column_1, column_2]
        schema = DataFrameSchema('name', col_list)
        df_metadata.schema = col_list

        self.assertEqual(df_metadata.name, 'name')
        self.assertEqual(df_metadata.file_url, 'eva_dataset')
        self.assertEqual(df_metadata.id, None)
        self.assertEqual(df_metadata.identifier_column, 'id')
        self.assertEqual(df_metadata.schema, schema)
示例#3
0
 def test_should_return_batches_equivalent_to_number_of_frames(self):
     video_info = DataFrameMetadata("dataset_1", 'dummy.avi')
     video_loader = VideoLoader(video_info)
     batches = list(video_loader.load())
     dummy_frames = list(self.create_dummy_frames())
     self.assertEqual(len(batches), NUM_FRAMES)
     self.assertEqual(dummy_frames, [batch.frames[0] for batch in batches])
示例#4
0
 def test_should_call_petastorm_make_reader_with_correct_params(self, mock):
     video_info = DataFrameMetadata("dataset_1", 'dummy.avi')
     video_loader = PetastormLoader(video_info,
                                    curr_shard=3,
                                    total_shards=3)
     list(video_loader._load_frames())
     mock.assert_called_once_with('dummy.avi', shard_count=3, cur_shard=3)
示例#5
0
    def test_should_return_the_new_path_after_execution(self, mock_class):
        class_instatnce = mock_class.return_value

        dummy_expr = type('dummy_expr', (),
                          {"evaluate": lambda x=None: [True, False, True]})

        # Build plan tree
        video = DataFrameMetadata("dataset", "dummy.avi")
        batch_1 = Batch(pd.DataFrame({'data': [1, 2, 3]}))
        batch_2 = Batch(pd.DataFrame({'data': [4, 5, 6]}))
        class_instatnce.load.return_value = map(lambda x: x,
                                                [batch_1, batch_2])

        storage_plan = StoragePlan(video)
        seq_scan = SeqScanPlan(predicate=dummy_expr, column_ids=[])
        seq_scan.append_child(storage_plan)

        # Execute the plan
        executor = PlanExecutor(seq_scan)
        actual = executor.execute_plan()
        expected = batch_1[::2] + batch_2[::2]

        mock_class.assert_called_once()

        self.assertEqual(expected, actual)
示例#6
0
 def test_should_return_batches_equivalent_to_number_of_frames(self):
     video_info = DataFrameMetadata("dataset_1", 'dummy.avi')
     video_loader = VideoLoader(video_info)
     batches = list(video_loader.load())
     dummy_frames = list(self.create_dummy_frames())
     self.assertEqual(len(batches), NUM_FRAMES)
     expected = [batch.frames.to_dict('records')[0] for batch in batches]
     self.assertTrue(custom_list_of_dicts_equal(dummy_frames, expected))
示例#7
0
 def test_should_skip_first_two_frames_with_offset_two(self):
     video_info = DataFrameMetadata("dataset_1", 'dummy.avi')
     video_loader = VideoLoader(video_info, offset=2)
     dummy_frames = list(
         self.create_dummy_frames(filters=[i
                                           for i in range(2, NUM_FRAMES)]))
     batches = list(video_loader.load())
     self.assertEqual(NUM_FRAMES - 2, len(batches))
     self.assertEqual(dummy_frames, [batch.frames[0] for batch in batches])
示例#8
0
 def test_should_return_half_then_number_of_batches_with_skip_of_two(self):
     video_info = DataFrameMetadata("dataset_1", 'dummy.avi')
     video_loader = VideoLoader(video_info, skip_frames=2)
     batches = list(video_loader.load())
     dummy_frames = list(
         self.create_dummy_frames(
             filters=[i * 2 for i in range(NUM_FRAMES // 2)]))
     self.assertEqual(len(batches), NUM_FRAMES / 2)
     self.assertEqual(dummy_frames, [batch.frames[0] for batch in batches])
示例#9
0
 def test_should_return_single_batch_if_batch_size_equal_to_no_of_frames(
         self):
     video_info = DataFrameMetadata("dataset_1", 'dummy.avi')
     video_loader = VideoLoader(video_info, batch_size=NUM_FRAMES)
     dummy_frames = list(
         self.create_dummy_frames(filters=[i for i in range(NUM_FRAMES)]))
     batches = list(video_loader.load())
     self.assertEqual(1, len(batches))
     self.assertEqual(dummy_frames, list(batches[0].frames))
示例#10
0
 def test_should_return_only_few_frames_when_limit_is_specified(self):
     video_info = DataFrameMetadata("dataset_1", 'dummy.avi')
     limit = 4
     video_loader = VideoLoader(video_info, limit=limit)
     dummy_frames = list(
         self.create_dummy_frames(filters=[i for i in range(limit)]))
     batches = list(video_loader.load())
     self.assertEqual(limit, len(batches))
     self.assertEqual(dummy_frames, [batch.frames[0] for batch in batches])
示例#11
0
 def test_should_return_only_few_frames_when_limit_is_specified(self):
     video_info = DataFrameMetadata("dataset_1", 'dummy.avi')
     limit = 4
     video_loader = VideoLoader(video_info, limit=limit)
     dummy_frames = list(
         self.create_dummy_frames(filters=[i for i in range(limit)]))
     batches = list(video_loader.load())
     self.assertEqual(limit, len(batches))
     expected = [batch.frames.to_dict('records')[0] for batch in batches]
     self.assertTrue(custom_list_of_dicts_equal(dummy_frames, expected))
示例#12
0
 def test_should_skip_first_two_frames_with_offset_two(self):
     video_info = DataFrameMetadata("dataset_1", 'dummy.avi')
     video_loader = VideoLoader(video_info, offset=2)
     dummy_frames = list(
         self.create_dummy_frames(
             filters=[i for i in range(2, NUM_FRAMES)]))
     batches = list(video_loader.load())
     self.assertEqual(NUM_FRAMES - 2, len(batches))
     expected = [batch.frames.to_dict('records')[0] for batch in batches]
     self.assertTrue(custom_list_of_dicts_equal(dummy_frames, expected))
示例#13
0
 def test_should_return_half_then_number_of_batches_with_skip_of_two(self):
     video_info = DataFrameMetadata("dataset_1", 'dummy.avi')
     video_loader = VideoLoader(video_info, skip_frames=2)
     batches = list(video_loader.load())
     dummy_frames = list(
         self.create_dummy_frames(
             filters=[i * 2 for i in range(NUM_FRAMES // 2)]))
     self.assertEqual(len(batches), NUM_FRAMES / 2)
     expected = [batch.frames.to_dict('records')[0] for batch in batches]
     self.assertTrue(custom_list_of_dicts_equal(dummy_frames, expected))
示例#14
0
    def test_df_metadata_equality(self):
        df_metadata = DataFrameMetadata('name', 'eva_dataset')
        column_1 = DataFrameColumn("frame_id", ColumnType.INTEGER, False)
        column_2 = DataFrameColumn("frame_label", ColumnType.INTEGER, False)
        col_list = [column_1, column_2]
        df_metadata.schema = col_list
        self.assertEqual(df_metadata, df_metadata)

        df_metadata1 = DataFrameMetadata('name2', 'eva_dataset')
        column_1 = DataFrameColumn("frame_id", ColumnType.INTEGER, False)
        column_2 = DataFrameColumn("frame_label", ColumnType.INTEGER, False)
        col_list = [column_1, column_2]
        df_metadata1.schema = col_list
        self.assertNotEqual(df_metadata, df_metadata1)
        df_metadata2 = DataFrameMetadata('name2', 'eva_dataset')
        df_metadata2.schema = col_list[1:]
        self.assertNotEqual(df_metadata1, df_metadata2)
示例#15
0
    def test_load_frame_load_frames_using_petastorm(self, mock):
        dummy_values = map(lambda i: self.DummyRow(i,
                                                   np.ones((2, 2, 3)) * i),
                           range(3))
        mock.return_value = self.DummyReader(dummy_values)

        video_info = DataFrameMetadata("dataset_1", 'dummy.avi')

        video_loader = PetastormLoader(video_info,
                                       curr_shard=3,
                                       total_shards=3)
        actual = list(video_loader._load_frames())
        expected = [value._asdict() for value in dummy_values]

        self.assertTrue(custom_list_of_dicts_equal(expected, actual))
示例#16
0
    def get_table_bindings(self, database_name: str, table_name: str,
                           column_names: List[str]) -> Tuple[int, List[int]]:
        """
        This method fetches bindings for strings
        :param database_name: currently not in use
        :param table_name: the table that is being referred to
        :param column_names: the column names of the table for which
        bindings are required
        :return: returns metadat_id of table and a list of column ids
        """

        metadata_id = DataFrameMetadata.get_id_from_name(table_name)
        column_ids = []
        if column_names is not None:
            column_ids = DataFrameColumn.get_id_from_metadata_id_and_name_in(
                metadata_id, column_names)
        return metadata_id, column_ids
示例#17
0
 def get_metadata(self,
                  metadata_id: int,
                  col_id_list: List[int] = None) -> DataFrameMetadata:
     """
     This method returns the metadata object given a metadata_id,
     when requested by the executor. It will further be used by storage
     engine for retrieving the dataframe.
     :param metadata_id: metadata id of the table
     :param col_id_list: optional column ids of the table referred
     :return:
     """
     metadata = DataFrameMetadata.get(metadata_id)
     if col_id_list is not None:
         df_columns = DataFrameColumn.get_by_metadata_id_and_id_in(
             col_id_list, metadata_id)
         metadata.set_schema(
             DataFrameSchema(metadata.get_name(), df_columns))
     return metadata
示例#18
0
    def test_load_frame_load_frames_using_petastorm(self, mock):
        mock.return_value = self.DummyReader(
            map(lambda i: self.DummyRow(i,
                                        np.ones((2, 2, 3)) * i), range(3)))

        video_info = DataFrameMetadata("dataset_1", 'dummy.avi')

        video_loader = PetastormLoader(video_info,
                                       curr_shard=3,
                                       total_shards=3)
        actual = list(video_loader._load_frames())
        expected = [
            Frame(i,
                  np.ones((2, 2, 3)) * i, FrameInfo(2, 2, 3, ColorSpace.BGR))
            for i in range(3)
        ]

        self.assertEqual(expected, actual)
    def test_calling_storage_executor_should_return_batches(self, mock_class):
        class_instance = mock_class.return_value

        video_info = DataFrameMetadata('dataset', 'dummy.avi')
        storage_plan = StoragePlan(video_info)

        executor = DiskStorageExecutor(storage_plan)

        class_instance.load.return_value = range(5)
        actual = list(executor.exec())

        mock_class.assert_called_once_with(
            video_info,
            batch_size=storage_plan.batch_size,
            limit=storage_plan.limit,
            offset=storage_plan.offset,
            skip_frames=(storage_plan.skip_frames),
            total_shards=0,
            curr_shard=0)
        class_instance.load.assert_called_once()
        self.assertEqual(list(range(5)), actual)