示例#1
0
    def test___init__default_metadata_dict(self, mock_meta, mock_relationships):
        """Test create Metadata instance default with a dict"""
        # Run
        metadata = Metadata({'some': 'meta'})

        # Asserts
        mock_meta.assert_called_once_with({'some': 'meta'})
        mock_relationships.assert_called_once_with()
        assert metadata.root_path == '.'
        assert metadata._hyper_transformers == dict()
示例#2
0
    def test_get_foreign_keys(self):
        """Test get foreign key"""
        # Setup
        metadata = Metadata({
            'tables': {
                'parent': {
                    'fields': {
                        'parent_id': {
                            'type': 'id',
                        }
                    },
                    'primary_key': 'parent_id'
                },
                'child': {
                    'fields': {
                        'parent_id': {
                            'type': 'id',
                            'ref': {
                                'table': 'parent',
                                'field': 'id'
                            }
                        },
                        'parent_id_2': {
                            'type': 'id',
                            'ref': {
                                'table': 'parent',
                                'field': 'id'
                            }
                        },
                    }
                }
            }
        })

        # Run
        result = Metadata.get_foreign_keys(metadata, 'parent', 'child')

        # Asserts
        assert result == ['parent_id', 'parent_id_2']
示例#3
0
文件: evaluation.py 项目: sdv-dev/SDV
def _validate_arguments(synthetic_data, real_data, metadata, root_path,
                        table_name):
    """Validate arguments needed to compute descriptors values.

    If ``metadata`` is an instance of dict create the ``Metadata`` object.
    If ``metadata`` is ``None``, ``real_data`` has to be a ``pandas.DataFrane``.

    If ``real_data`` is ``None`` load all the tables and assert that ``synthetic_data`` is
    a ``dict``. Otherwise, ``real_data`` and ``synthetic_data`` must be of the same type.

    If ``synthetic_data`` is not a ``dict``, create a dictionary using the ``table_name``.

    Assert that ``synthetic_data`` and ``real_data`` must have the same tables.

    Args:
        synthetic_data (dict or pandas.DataFrame):
            Synthesized data.
        real_data (dict, pandas.DataFrame or None):
            Real data.
        metadata (str, dict, Metadata or None):
            Metadata instance or details needed to build it.
        root_path (str):
            Path to the metadata file.
        table_name (str):
            Table name used to prepare the metadata object, real_data and synthetic_data dict.

    Returns:
        tuple (dict, dict, Metadata):
            Processed tables and Metadata oject.
    """
    if isinstance(metadata, dict):
        metadata = Metadata(metadata, root_path)
    elif metadata is None:
        if not isinstance(real_data, pd.DataFrame):
            raise TypeError(
                'If metadata is None, `real_data` has to be a DataFrame')

        metadata = Metadata()
        metadata.add_table(table_name, data=real_data)

    if real_data is None:
        real_data = metadata.load_tables()
        if not isinstance(synthetic_data, dict):
            raise TypeError(
                'If `real_data` is `None`, `synthetic_data` must be a dict')

    elif not isinstance(synthetic_data, type(real_data)):
        raise TypeError(
            '`real_data` and `synthetic_data` must be of the same type')

    if not isinstance(synthetic_data, dict):
        synthetic_data = {table_name: synthetic_data}

    if not isinstance(real_data, dict):
        real_data = {table_name: real_data}

    if not set(real_data.keys()) == set(synthetic_data.keys()):
        raise ValueError(
            'real_data and synthetic dataset must have the same tables')

    if len(real_data.keys()) < len(metadata.get_tables()):
        meta_dict = {
            table: metadata.get_table_meta(table)
            for table in real_data.keys()
        }
        metadata = Metadata({'tables': meta_dict})

    return synthetic_data, real_data, metadata.to_dict()