Exemplo n.º 1
0
def bind_tuple_value_expr(expr: 'AbstractExpression'):
    catalog = CatalogManager()
    table_id, column_ids = catalog.get_table_bindings(None,
                                                      expr.table_name,
                                                      expr.col_name)
    expr.table_metadata_id = table_id
    expr.col_metadata_id = column_ids.pop()
Exemplo n.º 2
0
def create_video_metadata(name: str) -> DataFrameMetadata:
    """Create video metadata object.
        We have predefined columns for such a object
        id:  the frame id
        data: the frame data

    Arguments:
        name (str): name of the metadata to be added to the catalog

    Returns:
        DataFrameMetadata:  corresponding metadata for the input table info
    """
    catalog = CatalogManager()
    columns = [ColumnDefinition('id', ColumnType.INTEGER, None,
                                [], ColConstraintInfo(unique=True))]
    # the ndarray dimensions are set as None. We need to fix this as we
    # cannot assume. Either ask the user to provide this with load or
    # we infer this from the provided video.
    columns.append(
        ColumnDefinition(
            'data', ColumnType.NDARRAY, NdArrayType.UINT8, [None, None, None]
        )
    )
    col_metadata = create_column_metadata(columns)
    uri = str(generate_file_path(name))
    metadata = catalog.create_metadata(
        name, uri, col_metadata, identifier_column='id')
    return metadata
Exemplo n.º 3
0
 def test_delete_metadata(self, dcs_mock, ds_mock, initdb_mock):
     dataset_name = "name"
     catalog = CatalogManager()
     catalog.delete_metadata(dataset_name)
     ds_id_mock = ds_mock.return_value.dataset_by_name
     ds_id_mock.assert_called_with(dataset_name)
     ds_mock.return_value.delete_dataset_by_id.assert_called_with(
         ds_id_mock.return_value)
Exemplo n.º 4
0
 def test_create_udf_io_object(self, udfio_mock):
     catalog = CatalogManager()
     actual = catalog.udf_io('name', ColumnType.TEXT, [100], True)
     udfio_mock.assert_called_with('name',
                                   ColumnType.TEXT,
                                   array_dimensions=[100],
                                   is_input=True)
     self.assertEqual(actual, udfio_mock.return_value)
Exemplo n.º 5
0
 def test_catalog_manager_reset(self, mock_bootstrap, mock_shutdown):
     x = CatalogManager()
     mock_init = MagicMock()
     with mock.patch.object(CatalogManager, '__init__', mock_init):
         x.reset()
         mock_init.assert_called_once_with()
         mock_bootstrap.assert_called_once_with()
         mock_shutdown.assert_called_once_with()
Exemplo n.º 6
0
 def test_create_udf(self, udfio_mock, udf_mock):
     catalog = CatalogManager()
     udf_io_list = [MagicMock()]
     actual = catalog.create_udf('udf', 'sample.py', 'classification',
                                 udf_io_list)
     udfio_mock.return_value.add_udf_io.assert_called_with(udf_io_list)
     udf_mock.return_value.create_udf.assert_called_with(
         'udf', 'sample.py', 'classification')
     self.assertEqual(actual, udf_mock.return_value.create_udf.return_value)
Exemplo n.º 7
0
 def test_create_udf_io_object(self, udfio_mock):
     catalog = CatalogManager()
     actual = catalog.udf_io('name', ColumnType.NDARRAY, NdArrayType.UINT8,
                             [2, 3, 4], True)
     udfio_mock.assert_called_with('name',
                                   ColumnType.NDARRAY,
                                   array_type=NdArrayType.UINT8,
                                   array_dimensions=[2, 3, 4],
                                   is_input=True)
     self.assertEqual(actual, udfio_mock.return_value)
Exemplo n.º 8
0
def bind_function_expr(expr: FunctionExpression, column_mapping):
    catalog = CatalogManager()
    udf_obj = catalog.get_udf_by_name(expr.name)
    if expr.output:
        expr.output_obj = catalog.get_udf_io_by_name(expr.output)
        if expr.output_obj is None:
            LoggingManager().log(
                'Invalid output {} selected for UDF {}'.format(
                    expr.output, expr.name),
                LoggingLevel().ERROR)
    expr.function = path_to_class(udf_obj.impl_file_path, udf_obj.name)()
Exemplo n.º 9
0
def bind_dataset(video_info: TableInfo) -> DataFrameMetadata:
    """
    Uses catalog to bind the dataset information for given video string.

    Arguments:
         video_info (TableInfo): video information obtained in SQL query

    Returns:
        DataFrameMetadata  -  corresponding metadata for the input table info
    """
    catalog = CatalogManager()
    return catalog.get_dataset_metadata(video_info.database_name,
                                        video_info.table_name)
Exemplo n.º 10
0
    def test_table_binding_without_columns_returns_no_column_ids(
            self, dcs_mock, ds_mock, initdb_mock):
        catalog = CatalogManager()
        dataset_name = "name"

        database_name = "database"

        actual = catalog.get_dataset_metadata(database_name, dataset_name)
        ds_mock.return_value.dataset_object_by_name.assert_called_with(
            database_name, dataset_name)

        self.assertEqual(
            actual, ds_mock.return_value.dataset_object_by_name.return_value)
Exemplo n.º 11
0
def _old_bind_tuple_value_expr(expr):
    """
    NOTE: No tests for this  should be combined with latest interface
    """
    catalog = CatalogManager()
    table_id, column_ids = catalog.get_table_bindings(None, expr.table_name,
                                                      [expr.col_name])
    expr.table_metadata_id = table_id
    if not isinstance(column_ids, list) or len(column_ids) == 0:
        LoggingManager().log(
            "Optimizer Utils:: bind_tuple_expr: \
            Cannot bind column name provided", LoggingLevel.ERROR)
    expr.col_metadata_id = column_ids.pop()
Exemplo n.º 12
0
def bind_table_ref(video_info: TableInfo) -> int:
    """Grab the metadata id from the catalog for
    input video

    Arguments:
        video_info {TableInfo} -- [input parsed video info]
    Return:
        catalog_entry for input table
    """

    catalog = CatalogManager()
    catalog_entry_id, _ = catalog.get_table_bindings(video_info.database_name,
                                                     video_info.table_name,
                                                     None)
    return catalog_entry_id
Exemplo n.º 13
0
    def test_table_binding_without_columns_returns_no_column_ids(
            self, dcs_mock, ds_mock, initdb_mock):
        catalog = CatalogManager()
        dataset_name = "name"

        database_name = "database"
        actual = catalog.get_table_bindings(database_name, dataset_name)
        ds_dataset_name_mock = ds_mock.return_value.dataset_by_name
        ds_dataset_name_mock.assert_called_with(dataset_name)

        column_values_mock = \
            dcs_mock.return_value.columns_by_dataset_id_and_names
        column_values_mock.assert_not_called()

        self.assertEqual(actual, (ds_dataset_name_mock.return_value, []))
Exemplo n.º 14
0
    def test_get_dataset_metadata_when_table_doesnot_exists(
            self, dcs_mock, ds_mock, initdb_mock):
        catalog = CatalogManager()
        dataset_name = "name"

        database_name = "database"
        metadata_obj = None

        ds_mock.return_value.dataset_object_by_name.return_value = metadata_obj

        actual = catalog.get_dataset_metadata(database_name, dataset_name)
        ds_mock.return_value.dataset_object_by_name.assert_called_with(
            database_name, dataset_name)
        dcs_mock.return_value.columns_by_id_and_dataset_id.assert_not_called()
        self.assertEqual(actual, metadata_obj)
Exemplo n.º 15
0
def create_column_metadata(col_list: List[ColumnDefinition]):
    """Create column metadata for the input parsed column list. This function
    will not commit the provided column into catalog table.
    Will only return in memory list of ColumnDataframe objects

    Arguments:
        col_list {List[ColumnDefinition]} -- parsed col list to be created
    """
    if isinstance(col_list, ColumnDefinition):
        col_list = [col_list]

    result_list = []
    for col in col_list:
        if col is None:
            LoggingManager().log(
                "Empty column while creating column metadata",
                LoggingLevel.ERROR)
            result_list.append(col)
        result_list.append(
            CatalogManager().create_column_metadata(
                col.name, col.type, col.array_type, col.dimension
            )
        )

    return result_list
Exemplo n.º 16
0
def column_definition_to_udf_io(
        col_list: List[ColumnDefinition], is_input: bool):
    """Create the UdfIO object fro each column definition provided

    Arguments:
        col_list(List[ColumnDefinition]): parsed input/output definitions
        is_input(bool): true if input else false
    """
    if isinstance(col_list, ColumnDefinition):
        col_list = [col_list]

    result_list = []
    for col in col_list:
        if col is None:
            LoggingManager().log(
                "Empty column definition while creating udf io",
                LoggingLevel.ERROR)
            result_list.append(col)
        result_list.append(
            CatalogManager().udf_io(col.name, col.type,
                                    array_type=col.array_type,
                                    dimensions=col.dimension,
                                    is_input=is_input)
        )
    return result_list
Exemplo n.º 17
0
    def test_should_load_video_in_table(self):
        query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;"""

        perform_query(query)

        metadata = CatalogManager().get_dataset_metadata("", "MyVideo")
        actual_batch = list(StorageEngine.read(metadata))
        expected_batch = list(create_dummy_batches())
        self.assertEqual(actual_batch, expected_batch)
Exemplo n.º 18
0
    def test_should_load_video_in_table(self):
        query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;"""
        execute_query_fetch_all(query)

        metadata = CatalogManager().get_dataset_metadata("", "MyVideo")
        actual_batch = Batch(pd.DataFrame())
        actual_batch = Batch.concat(StorageEngine.read(metadata), copy=False)
        actual_batch.sort()
        expected_batch = list(create_dummy_batches())
        self.assertEqual([actual_batch], expected_batch)
Exemplo n.º 19
0
    def test_delete_column_metadata(self, dcs_mock, ds_mock, initdb_mock):
        dataset_name = "name"
        columns = ["column1", "column2"]
        catalog = CatalogManager()

        actual = catalog.delete_column_metadata(dataset_name, columns)

        ds_id_mock = ds_mock.return_value.dataset_by_name
        ds_id_mock.assert_called_with(dataset_name)

        column_ids_mock = dcs_mock.return_value.columns_by_dataset_id_and_names
        column_ids_mock.assert_called_with(ds_id_mock.return_value, columns)

        column_objects_mock = dcs_mock.return_value.columns_by_id_and_dataset_id
        column_objects_mock.assert_called_with(ds_id_mock.return_value,
                                               column_ids_mock.return_value)

        column_del_mock = dcs_mock.return_value.delete_column
        column_del_mock.assert_called_with(column_objects_mock.return_value)
Exemplo n.º 20
0
    def test_table_binding_returns_metadata_and_column_ids(
            self, dcs_mock, ds_mock, initdb_mock):
        catalog = CatalogManager()
        dataset_name = "name"

        columns = ["column1", "column2"]
        database_name = "database"
        actual = catalog.get_table_bindings(database_name, dataset_name,
                                            columns)
        ds_dataset_name_mock = ds_mock.return_value.dataset_by_name
        ds_dataset_name_mock.assert_called_with(dataset_name)

        column_values_mock = \
            dcs_mock.return_value.columns_by_dataset_id_and_names
        column_values_mock.assert_called_with(
            ds_dataset_name_mock.return_value, columns)

        self.assertEqual(actual, (ds_dataset_name_mock.return_value,
                                  column_values_mock.return_value))
Exemplo n.º 21
0
    def test_dataset_by_name_should_return_name_of_model(
            self, dcs_mock, ds_mock, initdb_mock):
        #tests for dataset_by_name in df_service.py
        catalog = CatalogManager()
        file_url = "file1"
        set_name = "test_name"

        columns = [(DataFrameColumn("column", ColumnType.INTEGER))]
        catalog.create_metadata(set_name, file_url, columns)

        for column in columns:
            column.metadata_id = \
                ds_mock.return_value.create_dataset.return_value.id

        real = catalog._dataset_service.dataset_by_name(set_name)
        ds_mock.return_value.dataset_by_name.assert_called_with(set_name)

        test = ds_mock.return_value.dataset_by_name.return_value

        self.assertEqual(test, real)
Exemplo n.º 22
0
    def test_get_dataset_metadata_when_table_exists(self, dcs_mock, ds_mock,
                                                    initdb_mock):
        catalog = CatalogManager()
        dataset_name = "name"

        database_name = "database"
        schema = [1, 2, 3]
        id = 1
        metadata_obj = MagicMock(id=id, schema=None)
        ds_mock.return_value.dataset_object_by_name.return_value = metadata_obj
        dcs_mock.return_value. \
            columns_by_id_and_dataset_id.return_value = schema

        actual = catalog.get_dataset_metadata(database_name, dataset_name)
        ds_mock.return_value.dataset_object_by_name.assert_called_with(
            database_name, dataset_name)
        dcs_mock.return_value.columns_by_id_and_dataset_id.assert_called_with(
            id, None)
        self.assertEqual(actual.id, id)
        self.assertEqual(actual.schema, schema)
Exemplo n.º 23
0
    def test_create_metadata_should_create_dataset_and_columns(
            self, dcs_mock, ds_mock, initdb_mock):
        catalog = CatalogManager()
        file_url = "file1"
        dataset_name = "name"

        columns = [(DataFrameColumn("c1", ColumnType.INTEGER))]
        actual = catalog.create_metadata(dataset_name, file_url, columns)
        ds_mock.return_value.create_dataset.assert_called_with(
            dataset_name, file_url, identifier_id='id')
        for column in columns:
            column.metadata_id = \
                ds_mock.return_value.create_dataset.return_value.id

        dcs_mock.return_value.create_column.assert_called_with(columns)

        expected = ds_mock.return_value.create_dataset.return_value
        expected.schema = \
            dcs_mock.return_value.create_column.return_value

        self.assertEqual(actual, expected)
Exemplo n.º 24
0
    def test_should_load_video_in_table(self):
        query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;"""

        perform_query(query)

        metadata = CatalogManager().get_dataset_metadata("", "MyVideo")
        actual_batch = Batch(pd.DataFrame())
        for batch in StorageEngine.read(metadata):
            actual_batch += batch
        actual_batch.sort()
        expected_batch = list(create_dummy_batches())
        self.assertEqual([actual_batch], expected_batch)
Exemplo n.º 25
0
    def setUp(self):
        CatalogManager().reset()
        create_sample_video(NUM_FRAMES)
        load_query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;"""
        execute_query_fetch_all(load_query)

        create_udf_query = """CREATE UDF DummyObjectDetector
                  INPUT  (Frame_Array NDARRAY UINT8(3, 256, 256))
                  OUTPUT (label NDARRAY STR(10))
                  TYPE  Classification
                  IMPL  'test/util.py';
        """
        execute_query_fetch_all(create_udf_query)
Exemplo n.º 26
0
    def exec(self):
        """Create udf executor

        Calls the catalog to create udf metadata.
        """
        if (self.node.if_not_exists):
            # check catalog if it already has this udf entry
            return
        io_list = []
        io_list.extend(self.node.inputs)
        io_list.extend(self.node.outputs)
        impl_path = self.node.impl_path.absolute().as_posix()
        udf_metadata = CatalogManager().create_udf(self.node.name, impl_path,
                                                   self.node.udf_type, io_list)
Exemplo n.º 27
0
    def test_create_plan(self):
        dummy_info = TableInfo('dummy')
        dummy_table = TableRef(dummy_info)

        CatalogManager().reset()
        columns = [DataFrameColumn('id', ColumnType.INTEGER),
                   DataFrameColumn('name', ColumnType.TEXT,
                                   array_dimensions=50)]
        dummy_plan_node = CreatePlan(dummy_table, columns, False)
        self.assertEqual(dummy_plan_node.opr_type, PlanOprType.CREATE)
        self.assertEqual(dummy_plan_node.if_not_exists, False)
        self.assertEqual(dummy_plan_node.video_ref.table.table_name,
                         "dummy")
        self.assertEqual(dummy_plan_node.column_list[0].name, "id")
        self.assertEqual(dummy_plan_node.column_list[1].name, "name")
Exemplo n.º 28
0
    def exec(self):
        """Create table executor

        Calls the catalog to create metadata corresponding to the table.
        Calls the storage to create a spark dataframe from the metadata object.
        """
        if (self.node.if_not_exists):
            # check catalog if we already have this table
            return

        table_name = self.node.video_ref.table_info.table_name
        file_url = str(generate_file_path(table_name))
        metadata = CatalogManager().create_metadata(table_name, file_url,
                                                    self.node.column_list)

        StorageEngine.create(table=metadata)
Exemplo n.º 29
0
    def exec(self):
        """Create table executor

        Calls the catalog to create metadata corresponding to the table.
        Calls the storage to create a spark dataframe from the metadata object.
        """
        if (self.node.if_not_exists):
            # check catalog if we already have this table
            return
        # Generate a file_url to be used for table
        # hard coding a path right now, should write a auto-generator
        table_name = self.node.video_ref.table_info.table_name
        file_url = os.path.join(tempfile.gettempdir(), table_name)
        file_url = 'file://' + file_url
        metadata = CatalogManager().create_metadata(table_name, file_url,
                                                    self.node.column_list)

        create_dataframe(metadata)
        return file_url
Exemplo n.º 30
0
    def exec(self):
        """
        Based on the table it constructs a valid tuple using the values
        provided.
        Right now we assume there are no missing values
        """
        table_id = self.node.video_id
        data_tuple = []
        for col, val in zip(self.node.column_list, self.node.value_list):
            val = val.evaluate()
            val.frames.columns = [col.col_name]
            data_tuple.append(val)

        batch = Batch.merge_column_wise(data_tuple)
        metadata = CatalogManager().get_metadata(table_id)
        # verify value types are consistent

        batch.frames = SchemaUtils.petastorm_type_cast(
            metadata.schema.petastorm_schema, batch.frames)
        StorageEngine.write(metadata, batch)