Exemplo n.º 1
0
    def test__make_anonymization_mappings(self, mock_table):
        """Test that ``_make_anonymization_mappings`` creates the expected mappings.

        The ``_make_anonymization_mappings`` method should map values in the original
        data to fake values for non-id fields that are labeled pii.

        Setup:
        - Create a Table that has metadata about three fields (one pii field, one id field,
          and one non-pii field).
        Input:
        - Data that contains a pii field, an id field, and a non-pii field.
        Side Effects:
        - Expect ``_get_fake_values`` to be called with the number of unique values of the
          pii field.
        - Expect the resulting `_ANONYMIZATION_MAPPINGS` field to contain the pii field, with
          the correct number of mappings and keys.
        """
        # Setup
        metadata = Mock()
        metadata._ANONYMIZATION_MAPPINGS = {}
        foo_metadata = {
            'type': 'categorical',
            'pii': True,
            'pii_category': 'email',
        }
        metadata._fields_metadata = {
            'foo': foo_metadata,
            'bar': {
                'type': 'categorical',
            },
            'baz': {
                'type': 'id',
            }
        }
        foo_values = [
            '*****@*****.**', '*****@*****.**', '*****@*****.**'
        ]
        data = pd.DataFrame({
            'foo': foo_values,
            'bar': ['a', 'b', 'c'],
            'baz': [1, 2, 3],
        })

        # Run
        Table._make_anonymization_mappings(metadata, data)

        # Assert
        assert mock_table._get_fake_values.called_once_with(foo_metadata, 3)

        mappings = metadata._ANONYMIZATION_MAPPINGS[id(metadata)]
        assert len(mappings) == 1

        foo_mappings = mappings['foo']
        assert len(foo_mappings) == 3
        assert list(foo_mappings.keys()) == foo_values
Exemplo n.º 2
0
    def test__make_anonymization_mappings_unique_faked_value_in_field(
            self, mock_table):
        """Test that ``_make_anonymization_mappings`` method creates mappings for anonymized values.

        The ``_make_anonymization_mappings`` method should map equal values in the original data
        to the same faked value.

        Input:
        - DataFrame with a field that should be anonymized based on the metadata description.
        Side Effect:
        - Mappings are created from the original values to faked values.
        """
        # Setup
        metadata = Mock()
        metadata._ANONYMIZATION_MAPPINGS = {}
        foo_metadata = {
            'type': 'categorical',
            'pii': True,
            'pii_category': 'email'
        }
        metadata._fields_metadata = {'foo': foo_metadata}
        data = pd.DataFrame({
            'foo':
            ['*****@*****.**', '*****@*****.**', '*****@*****.**']
        })

        # Run
        Table._make_anonymization_mappings(metadata, data)

        # Assert
        assert mock_table._get_fake_values.called_once_with(foo_metadata, 2)

        mappings = metadata._ANONYMIZATION_MAPPINGS[id(metadata)]
        assert len(mappings) == 1

        foo_mappings = mappings['foo']
        assert len(foo_mappings) == 2
        assert list(
            foo_mappings.keys()) == ['*****@*****.**', '*****@*****.**']