def test___init__default_metadata_dict(self, mock_meta, mock_relationships): """Test create Metadata instance default with a dict""" # Run metadata = Metadata({'some': 'meta'}) # Asserts mock_meta.assert_called_once_with({'some': 'meta'}) mock_relationships.assert_called_once_with() assert metadata.root_path == '.' assert metadata._hyper_transformers == dict()
def test_get_foreign_keys(self): """Test get foreign key""" # Setup metadata = Metadata({ 'tables': { 'parent': { 'fields': { 'parent_id': { 'type': 'id', } }, 'primary_key': 'parent_id' }, 'child': { 'fields': { 'parent_id': { 'type': 'id', 'ref': { 'table': 'parent', 'field': 'id' } }, 'parent_id_2': { 'type': 'id', 'ref': { 'table': 'parent', 'field': 'id' } }, } } } }) # Run result = Metadata.get_foreign_keys(metadata, 'parent', 'child') # Asserts assert result == ['parent_id', 'parent_id_2']
def _validate_arguments(synthetic_data, real_data, metadata, root_path, table_name): """Validate arguments needed to compute descriptors values. If ``metadata`` is an instance of dict create the ``Metadata`` object. If ``metadata`` is ``None``, ``real_data`` has to be a ``pandas.DataFrane``. If ``real_data`` is ``None`` load all the tables and assert that ``synthetic_data`` is a ``dict``. Otherwise, ``real_data`` and ``synthetic_data`` must be of the same type. If ``synthetic_data`` is not a ``dict``, create a dictionary using the ``table_name``. Assert that ``synthetic_data`` and ``real_data`` must have the same tables. Args: synthetic_data (dict or pandas.DataFrame): Synthesized data. real_data (dict, pandas.DataFrame or None): Real data. metadata (str, dict, Metadata or None): Metadata instance or details needed to build it. root_path (str): Path to the metadata file. table_name (str): Table name used to prepare the metadata object, real_data and synthetic_data dict. Returns: tuple (dict, dict, Metadata): Processed tables and Metadata oject. """ if isinstance(metadata, dict): metadata = Metadata(metadata, root_path) elif metadata is None: if not isinstance(real_data, pd.DataFrame): raise TypeError( 'If metadata is None, `real_data` has to be a DataFrame') metadata = Metadata() metadata.add_table(table_name, data=real_data) if real_data is None: real_data = metadata.load_tables() if not isinstance(synthetic_data, dict): raise TypeError( 'If `real_data` is `None`, `synthetic_data` must be a dict') elif not isinstance(synthetic_data, type(real_data)): raise TypeError( '`real_data` and `synthetic_data` must be of the same type') if not isinstance(synthetic_data, dict): synthetic_data = {table_name: synthetic_data} if not isinstance(real_data, dict): real_data = {table_name: real_data} if not set(real_data.keys()) == set(synthetic_data.keys()): raise ValueError( 'real_data and synthetic dataset must have the same tables') if len(real_data.keys()) < len(metadata.get_tables()): meta_dict = { table: metadata.get_table_meta(table) for table in real_data.keys() } metadata = Metadata({'tables': meta_dict}) return synthetic_data, real_data, metadata.to_dict()