Exemplo n.º 1
0
    def test__valid_separator_non_valid_separator_contained(self):
        """Test ``_valid_separator`` passing a column that contains the separator.

        If any of the columns contains the separator string, result is ``False``.

        Input:
        - Table data (pandas.DataFrame) with a column that contains the separator string ('#')
        Output:
        - False (bool).
        """
        # Setup
        columns = ['b', 'c']
        instance = UniqueCombinations(columns=columns)
        instance._separator = '#'

        # Run
        table_data = pd.DataFrame({
            'a': ['a', 'b', 'c'],
            'b': ['d', '#', 'f'],
            'c': ['g', 'h', 'i']
        })
        is_valid = instance._valid_separator(table_data)

        # Assert
        assert not is_valid
Exemplo n.º 2
0
    def test_is_valid_false(self):
        """Test the ``UniqueCombinations.is_valid`` method.

        If the input data doesn't satisfy the constraint, result is a series of ``False`` values.

        Input:
        - Table data (pandas.DataFrame), which does not satisfy the constraint.
        Output:
        - Series of ``False`` values (pandas.Series)
        Side effects:
        - Since the ``is_valid`` method needs ``self._combinations``, method ``fit``
        must be called as well.
        """
        # Setup
        table_data = pd.DataFrame({
            'a': ['a', 'b', 'c'],
            'b': ['d', 'e', 'f'],
            'c': ['g', 'h', 'i']
        })
        columns = ['b', 'c']
        instance = UniqueCombinations(columns=columns)
        instance.fit(table_data)

        # Run
        incorrect_table = pd.DataFrame({
            'a': ['a', 'b', 'c'],
            'b': ['D', 'E', 'F'],
            'c': ['g', 'h', 'i']
        })
        out = instance.is_valid(incorrect_table)

        # Assert
        expected_out = pd.Series([False, False, False], name='b#c')
        pd.testing.assert_series_equal(expected_out, out)
Exemplo n.º 3
0
    def test__valid_separator_valid(self):
        """Test ``_valid_separator`` for a valid separator.

        If the separator and data are valid, result is ``True``.

        Input:
        - Table data (pandas.DataFrame)
        Output:
        - True (bool).
        """
        # Setup
        columns = ['b', 'c']
        instance = UniqueCombinations(columns=columns)
        instance._separator = '#'

        # Run
        table_data = pd.DataFrame({
            'a': ['a', 'b', 'c'],
            'b': ['d', 'e', 'f'],
            'c': ['g', 'h', 'i']
        })
        is_valid = instance._valid_separator(table_data)

        # Assert
        assert is_valid
Exemplo n.º 4
0
    def reverse_transform(self):
        """Test the ``UniqueCombinations.reverse_transform`` method.

        It is expected to return the original data separating the concatenated columns.

        Input:
        - Table data transformed (pandas.DataFrame)
        Output:
        - Original table data, with the concatenated columns separated (pandas.DataFrame)
        Side effects:
        - Since the ``transform`` method needs ``self._joint_column``, method ``fit``
        must be called as well.
        """
        # Setup
        transformed_data = pd.DataFrame({
            'a': ['a', 'b', 'c'],
            'b#c': ['d#g', 'e#h', 'f#i']
        })
        columns = ['b', 'c']
        instance = UniqueCombinations(columns=columns)
        instance.fit(transformed_data)

        # Run
        out = instance.reverse_transform(transformed_data)

        # Assert
        expected_out = pd.DataFrame({
            'a': ['a', 'b', 'c'],
            'b': ['d', 'e', 'f'],
            'c': ['g', 'h', 'i']
        })
        pd.testing.assert_frame_equal(expected_out, out)
Exemplo n.º 5
0
    def test_is_valid_true(self):
        """Test the ``UniqueCombinations.is_valid`` method.

        If the input data satisfies the constraint, result is a series of ``True`` values.

        Input:
        - Table data (pandas.DataFrame), satisfying the constraint.
        Output:
        - Series of ``True`` values (pandas.Series)
        Side effects:
        - Since the ``is_valid`` method needs ``self._combinations``, method ``fit``
        must be called as well.
        """
        # Setup
        table_data = pd.DataFrame({
            'a': ['a', 'b', 'c'],
            'b': ['d', 'e', 'f'],
            'c': ['g', 'h', 'i']
        })
        columns = ['b', 'c']
        instance = UniqueCombinations(columns=columns)
        instance.fit(table_data)

        # Run
        out = instance.is_valid(table_data)

        expected_out = pd.Series([True, True, True], name='b#c')
        pd.testing.assert_series_equal(expected_out, out)
Exemplo n.º 6
0
    def test_fit(self):
        """Test the ``UniqueCombinations.fit`` method.

        The ``UniqueCombinations.fit`` method is expected to:
        - Call ``UniqueCombinations._valid_separator``.
        - Find a valid separator for the data and generate the joint column name.

        Input:
        - Table data (pandas.DataFrame)
        """
        # Setup
        columns = ['b', 'c']
        instance = UniqueCombinations(columns=columns)

        # Run
        table_data = pd.DataFrame({
            'a': ['a', 'b', 'c'],
            'b': ['d', 'e', 'f'],
            'c': ['g', 'h', 'i']
        })
        instance.fit(table_data)

        # Asserts
        expected_combinations = pd.DataFrame({
            'b': ['d', 'e', 'f'],
            'c': ['g', 'h', 'i']
        })
        assert instance._separator == '#'
        assert instance._joint_column == 'b#c'
        pd.testing.assert_frame_equal(instance._combinations,
                                      expected_combinations)
Exemplo n.º 7
0
    def test__valid_separator_non_valid_name_joined_exists(self):
        """Test ``_valid_separator`` passing a column whose name is obtained after joining
        the column names using the separator.

        If the column name obtained after joining the column names using the separator
        already exists, result is ``False``.

        Input:
        - Table data (pandas.DataFrame) with a column name that will be obtained by joining
        the column names and the separator.
        Output:
        - False (bool).
        """
        # Setup
        columns = ['b', 'c']
        instance = UniqueCombinations(columns=columns)
        instance._separator = '#'

        # Run
        table_data = pd.DataFrame({
            'b#c': ['a', 'b', 'c'],
            'b': ['d', 'e', 'f'],
            'c': ['g', 'h', 'i']
        })
        is_valid = instance._valid_separator(table_data)

        # Assert
        assert not is_valid
Exemplo n.º 8
0
    def test_to_dict(self):
        """Test the ``Constraint.to_dict`` method.

        The ``Constraint.to_dict`` method is expected to return a dict
        containting the FQN of the constraint instance and all the
        required arguments rebuild it.

        Output:
        - Dict with the right values.
        """
        # Run
        instance = UniqueCombinations(columns=['a', 'b'],
                                      handling_strategy='transform')
        constraint_dict = instance.to_dict()

        # Assert
        expected_dict = {
            'constraint': 'sdv.constraints.tabular.UniqueCombinations',
            'handling_strategy': 'transform',
            'columns': ['a', 'b'],
        }
        assert constraint_dict == expected_dict
Exemplo n.º 9
0
    def test___init__(self):
        """Test the ``UniqueCombinations.__init__`` method.

        It is expected to create a new Constraint instance and receiving the names of
        the columns that need to produce unique combinations.

        Side effects:
        - instance._colums == columns
        """
        # Setup
        columns = ['b', 'c']

        # Run
        instance = UniqueCombinations(columns=columns)

        # Assert
        assert instance._columns == columns
Exemplo n.º 10
0
    def test_transform_not_all_columns_provided(self):
        """Test the ``UniqueCombinations.transform`` method.

        If some of the columns needed for the transform are missing, it will raise
        a ``MissingConstraintColumnError``.

        Input:
        - Table data (pandas.DataFrame)
        Output:
        - Raises ``MissingConstraintColumnError``.
        """
        # Setup
        table_data = pd.DataFrame({
            'a': ['a', 'b', 'c'],
            'b': ['d', 'e', 'f'],
            'c': ['g', 'h', 'i']
        })
        columns = ['b', 'c']
        instance = UniqueCombinations(columns=columns)
        instance.fit(table_data)

        # Run/Assert
        with pytest.raises(MissingConstraintColumnError):
            instance.transform(pd.DataFrame({'a': ['a', 'b', 'c']}))