def create_video_metadata(name: str) -> DataFrameMetadata: """Create video metadata object. We have predefined columns for such a object id: the frame id data: the frame data Arguments: name (str): name of the metadata to be added to the catalog Returns: DataFrameMetadata: corresponding metadata for the input table info """ catalog = CatalogManager() columns = [ColumnDefinition('id', ColumnType.INTEGER, None, [], ColConstraintInfo(unique=True))] # the ndarray dimensions are set as None. We need to fix this as we # cannot assume. Either ask the user to provide this with load or # we infer this from the provided video. columns.append( ColumnDefinition( 'data', ColumnType.NDARRAY, NdArrayType.UINT8, [None, None, None] ) ) col_metadata = create_column_metadata(columns) uri = str(generate_file_path(name)) metadata = catalog.create_metadata( name, uri, col_metadata, identifier_column='id') return metadata
def test_should_return_a_randon_full_path(self, mock_conf): mock_conf_inst = MagicMock() mock_conf.return_value = mock_conf_inst mock_conf_inst.get_value.return_value = 'eva_datasets' expected = Path('eva_datasets').resolve() actual = generate_file_path('test') self.assertTrue(actual.is_absolute()) # Root directory must be the same, filename is random self.assertTrue(expected.match(str(actual.parent))) mock_conf_inst.get_value.return_value = None self.assertRaises(KeyError, generate_file_path)
def exec(self): """Create table executor Calls the catalog to create metadata corresponding to the table. Calls the storage to create a spark dataframe from the metadata object. """ if (self.node.if_not_exists): # check catalog if we already have this table return table_name = self.node.video_ref.table_info.table_name file_url = str(generate_file_path(table_name)) metadata = CatalogManager().create_metadata(table_name, file_url, self.node.column_list) StorageEngine.create(table=metadata)