def test__transform_constraints_raises_error(self): """Test that method raises error when specified. The ``_transform_constraints`` method is expected to raise ``MissingConstraintColumnError`` if the constraint transform raises one and ``on_missing_column`` is set to error. Input: - Table data Side Effects: - MissingConstraintColumnError """ # Setup data = pd.DataFrame({ 'item 0': [0, 1, 2], 'item 1': [3, 4, 5] }, index=[0, 1, 2]) constraint_mock = Mock() constraint_mock.transform.side_effect = MissingConstraintColumnError table_mock = Mock() table_mock._constraints = [constraint_mock] # Run/Assert with pytest.raises(MissingConstraintColumnError): Table._transform_constraints(table_mock, data, 'error')
def __init__(self, field_names=None, field_types=None, anonymize_fields=None, primary_key=None, entity_columns=None, context_columns=None, sequence_index=None, segment_size=None, context_model=None, table_metadata=None): if table_metadata is None: self._metadata = Table( field_names=field_names, primary_key=primary_key, field_types=field_types, anonymize_fields=anonymize_fields, dtype_transformers=self._DTYPE_TRANSFORMERS, sequence_index=sequence_index, entity_columns=entity_columns, context_columns=context_columns, ) self._metadata_fitted = False else: null_args = ( field_names, primary_key, field_types, anonymize_fields, sequence_index, entity_columns, context_columns ) for arg in null_args: if arg: raise ValueError( 'If table_metadata is given {} must be None'.format(arg.__name__)) if isinstance(table_metadata, dict): table_metadata = Table.from_dict( table_metadata, dtype_transformers=self._DTYPE_TRANSFORMERS, ) self._metadata = table_metadata self._metadata_fitted = table_metadata.fitted # Validate arguments if segment_size is not None and not isinstance(segment_size, int): if sequence_index is None: raise TypeError( '`segment_size` must be of type `int` if ' 'no `sequence_index` is given.' ) segment_size = pd.to_timedelta(segment_size) self._context_columns = self._metadata._context_columns self._entity_columns = self._metadata._entity_columns self._sequence_index = self._metadata._sequence_index self._segment_size = segment_size context_model = context_model or 'gaussian_copula' if isinstance(context_model, str): context_model = self._CONTEXT_MODELS[context_model] self._context_model_template = context_model
def test_fit_constraint_transform_errors(self): """Test the ``fit`` method when constraints error on transform. The ``fit`` method should loop through all the constraints and try to fit them. Then it should loop through again and try to transform. If any errors are raised, they should be caught and surfaced together. Setup: - Set the ``_constraints`` to be a list of mocked constraints. - Set constraint mocks to raise Exceptions when calling transform. Input: - A ``pandas.DataFrame``. Side effect: - A ``MultipleConstraintsErrors`` error should be raised. """ # Setup data = pd.DataFrame({'a': [1, 2, 3]}) instance = Table() constraint1 = Mock() constraint2 = Mock() constraint1.transform.side_effect = Exception('error 1') constraint2.transform.side_effect = Exception('error 2') instance._constraints = [constraint1, constraint2] # Run / Assert error_message = re.escape('\nerror 1\n\nerror 2') with pytest.raises(MultipleConstraintsErrors, match=error_message): instance.fit(data) constraint1.fit.assert_called_once_with(data) constraint2.fit.assert_called_once_with(data)
def test__transform_constraints_is_condition_false_returns_data(self): """Test that method returns data unchanged when necessary. The ``_transform_constraints`` method is expected to return data unchanged when the constraint transform raises a ``MissingConstraintColumnError`` and the ``is_condition`` flag is False. Input: - Table data Output: - Table with dropped columns """ # Setup data = pd.DataFrame({ 'item 0': [0, 1, 2], 'item 1': [3, 4, 5] }, index=[0, 1, 2]) constraint_mock = Mock() constraint_mock.transform.side_effect = MissingConstraintColumnError(missing_columns=[]) constraint_mock.constraint_columns = ['item 0'] table_instance = Table() table_instance._constraints = [constraint_mock] table_instance._constraints_to_reverse = [constraint_mock] # Run result = table_instance._transform_constraints(data, False) # Assert expected_result = pd.DataFrame({ 'item 0': [0, 1, 2], 'item 1': [3, 4, 5] }, index=[0, 1, 2]) assert result.equals(expected_result) assert table_instance._constraints_to_reverse == []
def test__prepare_constraints_invalid_order_raises_exception( self, from_dict_mock): """Test the ``_prepare_constraints`` method validates the constraint order. If one constraint has ``rebuild_columns`` that are in a later constraint's ``constraint_columns``, an exception should be raised. Input: - List of constraints with some having ``rebuild_columns`` that are in a later constraint's ``constraint_columns``. Side Effect: - Exception should be raised. """ # Setup constraint1 = Constraint(handling_strategy='reject_sampling') constraint2 = Constraint(handling_strategy='reject_sampling') constraint3 = Constraint(handling_strategy='transform') constraint4 = Constraint(handling_strategy='transform') constraints = [constraint1, constraint2, constraint3, constraint4] constraint3.rebuild_columns = ['a', 'd'] constraint4.constraint_columns = ['a', 'b', 'c'] constraint4.rebuild_columns = ['a'] from_dict_mock.side_effect = [ constraint1, constraint2, constraint3, constraint4 ] # Run with pytest.raises(Exception): Table._prepare_constraints(constraints)
def __init__(self, field_names=None, field_types=None, field_transformers=None, anonymize_fields=None, primary_key=None, constraints=None, table_metadata=None): if table_metadata is None: self._metadata = Table( field_names=field_names, primary_key=primary_key, field_types=field_types, anonymize_fields=anonymize_fields, constraints=constraints, dtype_transformers=self._DTYPE_TRANSFORMERS, ) self._metadata_fitted = False else: for arg in (field_names, primary_key, field_types, anonymize_fields, constraints): if arg: raise ValueError( 'If table_metadata is given {} must be None'.format( arg.__name__)) if isinstance(table_metadata, dict): table_metadata = Table.from_dict(table_metadata) self._metadata = table_metadata self._metadata_fitted = table_metadata.fitted
def test__make_anonymization_mappings(self, mock_table): """Test that ``_make_anonymization_mappings`` creates the expected mappings. The ``_make_anonymization_mappings`` method should map values in the original data to fake values for non-id fields that are labeled pii. Setup: - Create a Table that has metadata about three fields (one pii field, one id field, and one non-pii field). Input: - Data that contains a pii field, an id field, and a non-pii field. Side Effects: - Expect ``_get_fake_values`` to be called with the number of unique values of the pii field. - Expect the resulting `_ANONYMIZATION_MAPPINGS` field to contain the pii field, with the correct number of mappings and keys. """ # Setup metadata = Mock() metadata._ANONYMIZATION_MAPPINGS = {} foo_metadata = { 'type': 'categorical', 'pii': True, 'pii_category': 'email', } metadata._fields_metadata = { 'foo': foo_metadata, 'bar': { 'type': 'categorical', }, 'baz': { 'type': 'id', } } foo_values = [ '*****@*****.**', '*****@*****.**', '*****@*****.**' ] data = pd.DataFrame({ 'foo': foo_values, 'bar': ['a', 'b', 'c'], 'baz': [1, 2, 3], }) # Run Table._make_anonymization_mappings(metadata, data) # Assert assert mock_table._get_fake_values.called_once_with(foo_metadata, 3) mappings = metadata._ANONYMIZATION_MAPPINGS[id(metadata)] assert len(mappings) == 1 foo_mappings = mappings['foo'] assert len(foo_mappings) == 3 assert list(foo_mappings.keys()) == foo_values
def test__transform_constraints_drops_columns(self): """Test that method drops columns when specified. The ``_transform_constraints`` method is expected to drop columns associated with a constraint its transform raises a MissingConstraintColumnError and ``on_missing_column`` is set to drop. Input: - Table data Output: - Table with dropped columns """ # Setup data = pd.DataFrame({ 'item 0': [0, 1, 2], 'item 1': [3, 4, 5] }, index=[0, 1, 2]) constraint_mock = Mock() constraint_mock.transform.side_effect = MissingConstraintColumnError constraint_mock.constraint_columns = ['item 0'] table_mock = Mock() table_mock._constraints = [constraint_mock] # Run result = Table._transform_constraints(table_mock, data, 'drop') # Assert expected_result = pd.DataFrame({'item 1': [3, 4, 5]}, index=[0, 1, 2]) assert result.equals(expected_result)
def test__make_ids_unique_field_index_out_of_order(self): """Test that updated id column is unique even if index is out of order.""" metadata_dict = { 'fields': { 'item 0': { 'type': 'id', 'subtype': 'integer' }, 'item 1': { 'type': 'boolean' } }, 'primary_key': 'item 0' } metadata = Table.from_dict(metadata_dict) data = pd.DataFrame( { 'item 0': [0, 1, 1, 2, 3, 5, 5, 6], 'item 1': [True, True, False, False, True, False, False, True] }, index=[0, 1, 1, 2, 3, 5, 5, 6]) new_data = metadata.make_ids_unique(data) assert new_data['item 1'].equals(data['item 1']) assert new_data['item 0'].is_unique
def test___init__(self, transformer_mock): """Test that ``__init__`` method passes parameters. The ``__init__`` method should pass the custom parameters to the ``NumericalTransformer``. Input: - rounding set to an int - max_value set to an int - min_value set to an int Side Effects: - ``NumericalTransformer`` should receive the correct parameters """ # Run Table(rounding=-1, max_value=100, min_value=-50) # Asserts assert len(transformer_mock.mock_calls) == 2 transformer_mock.assert_any_call(dtype=int, rounding=-1, max_value=100, min_value=-50) transformer_mock.assert_any_call(dtype=float, rounding=-1, max_value=100, min_value=-50)
def test__prepare_constraints_validates_constraint_order( self, from_dict_mock): """Test the ``_prepare_constraints`` method validates the constraint order. If no constraint has ``rebuild_columns`` that are in a later constraint's ``constraint_columns``, no exception should be raised. Input: - List of constraints with none having ``rebuild_columns`` that are in a later constraint's ``constraint_columns``. Output: - Sorted list of constraints. """ # Setup constraint1 = Constraint(handling_strategy='reject_sampling') constraint2 = Constraint(handling_strategy='reject_sampling') constraint3 = Constraint(handling_strategy='transform') constraint4 = Constraint(handling_strategy='transform') constraints = [constraint1, constraint2, constraint3, constraint4] constraint3.rebuild_columns = ['e', 'd'] constraint4.constraint_columns = ['a', 'b', 'c'] constraint4.rebuild_columns = ['a'] from_dict_mock.side_effect = [ constraint1, constraint2, constraint3, constraint4 ] # Run sorted_constraints = Table._prepare_constraints(constraints) # Assert assert sorted_constraints == constraints
def test__prepare_constraints_sorts_constraints_none_rebuild_columns( self, from_dict_mock): """Test that ``_prepare_constraints`` method sorts constraints. The ``_prepare_constraints`` method should sort constraints with None as ``rebuild_columns`` before those that have them. Input: - list of constraints with some having None as ``rebuild_columns`` listed after those with ``rebuild_columns``. Output: - List of constraints sorted properly. """ # Setup constraint1 = Constraint(handling_strategy='transform') constraint2 = Constraint(handling_strategy='transform') constraint3 = Constraint(handling_strategy='reject_sampling') constraints = [constraint1, constraint2, constraint3] constraint1.rebuild_columns = ['a'] constraint2.rebuild_columns = ['b'] constraint3.rebuild_columns = None from_dict_mock.side_effect = [constraint1, constraint2, constraint3] # Run sorted_constraints = Table._prepare_constraints(constraints) # Asserts assert sorted_constraints == [constraint3, constraint1, constraint2]
def test__validate_data_on_constraints(self): """Test the ``Table._validate_data_on_constraints`` method. Expect that the method returns True when the constraint columns are in the given data, and the constraint.is_valid method returns True. Input: - Table data Output: - None Side Effects: - No error """ # Setup data = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]}, index=[0, 1, 2]) constraint_mock = Mock() constraint_mock.is_valid.return_value = pd.Series([True, True, True]) constraint_mock.constraint_columns = ['a', 'b'] table_mock = Mock() table_mock._constraints = [constraint_mock] # Run result = Table._validate_data_on_constraints(table_mock, data) # Assert assert result is None
def test__validate_data_on_constraints_missing_cols(self): """Test the ``Table._validate_data_on_constraints`` method. Expect that the method returns True when the constraint columns are not in the given data. Input: - Table data that is missing a constraint column Output: - None Side Effects: - No error """ # Setup data = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]}, index=[0, 1, 2]) constraint_mock = Mock() constraint_mock.constraint_columns = ['a', 'b', 'c'] table_mock = Mock() table_mock._constraints = [constraint_mock] # Run result = Table._validate_data_on_constraints(table_mock, data) # Assert assert result is None
def __init__(self, field_names=None, field_types=None, field_transformers=None, anonymize_fields=None, primary_key=None, constraints=None, table_metadata=None, distribution=None, default_distribution=None, categorical_transformer=None): if isinstance(table_metadata, dict): table_metadata = Table.from_dict(table_metadata) if table_metadata: model_kwargs = table_metadata.get_model_kwargs(self.__class__.__name__) if model_kwargs: if distribution is None: distribution = model_kwargs['distribution'] if categorical_transformer is None: categorical_transformer = model_kwargs['categorical_transformer'] self._distribution = distribution self._default_distribution = default_distribution or 'parametric' categorical_transformer = categorical_transformer or self._DEFAULT_TRANSFORMER self._categorical_transformer = categorical_transformer self._DTYPE_TRANSFORMERS = {'O': categorical_transformer} super().__init__( field_names=field_names, primary_key=primary_key, field_types=field_types, anonymize_fields=anonymize_fields, constraints=constraints, table_metadata=table_metadata )
def __init__(self, field_names=None, field_types=None, field_transformers=None, anonymize_fields=None, primary_key=None, constraints=None, table_metadata=None, field_distributions=None, default_distribution=None, categorical_transformer=None): if isinstance(table_metadata, dict): table_metadata = Table.from_dict(table_metadata) if table_metadata: model_kwargs = table_metadata.get_model_kwargs( self.__class__.__name__) if model_kwargs: if field_distributions is None: field_distributions = model_kwargs['field_distributions'] if default_distribution is None: default_distribution = model_kwargs['default_distribution'] if categorical_transformer is None: categorical_transformer = model_kwargs[ 'categorical_transformer'] if field_distributions and not isinstance(field_distributions, dict): raise TypeError( 'field_distributions can only be None or a dict instance') self._field_distributions = { field: self._validate_distribution(distribution) for field, distribution in (field_distributions or {}).items() } self._default_distribution = ( self._validate_distribution(default_distribution) or self._DEFAULT_DISTRIBUTION) self._categorical_transformer = categorical_transformer or self._DEFAULT_TRANSFORMER self._DTYPE_TRANSFORMERS = {'O': self._categorical_transformer} super().__init__( field_names=field_names, field_types=field_types, field_transformers=field_transformers, anonymize_fields=anonymize_fields, primary_key=primary_key, constraints=constraints, table_metadata=table_metadata, ) self._metadata.set_model_kwargs( self.__class__.__name__, { 'field_distributions': field_distributions, 'default_distribution': default_distribution, 'categorical_transformer': categorical_transformer, })
def test__get_faker_specified_locales_list(self): """Test that ``_get_faker`` with locales parameter sets localization correctly. The ``_get_faker`` should return a Faker object localized to the specified locales. Input: - Field metadata from metadata dict. Output: - Faker object with specified list of localizations. """ # Setup metadata_dict = { 'fields': { 'foo': { 'type': 'categorical', 'pii': True, 'pii_category': 'company', 'pii_locales': ['en_US', 'sv_SE'] } } } # Run faker = Table.from_dict(metadata_dict)._get_faker(metadata_dict['fields']['foo']) # Assert assert isinstance(faker, Faker) assert faker.locales == ['en_US', 'sv_SE']
def test__get_faker_default_locale(self): """Test that ``_get_faker`` without locales parameter has default locale. The ``_get_faker`` should return a Faker object localized to the default locale. When no locales are specified explicitly. Input: - Field metadata from metadata dict. Output: - Faker object with default localization. """ # Setup metadata_dict = { 'fields': { 'foo': { 'type': 'categorical', 'pii': True, 'pii_category': 'company' } } } # Run faker = Table.from_dict(metadata_dict)._get_faker(metadata_dict['fields']['foo']) # Assert assert isinstance(faker, Faker) assert faker.locales == [DEFAULT_LOCALE]
def test_from_dict_min_max(self): """Test the ``Table.from_dict`` method. Expect that when min_value and max_value are not provided, they are set to 'auto'. Input: - A dictionary representing a table's metadata Output: - A Table object """ # Setup metadata_dict = { 'fields': { 'item 0': {'type': 'id', 'subtype': 'integer'}, 'item 1': {'type': 'boolean'} }, 'primary_key': 'item 0' } # Run metadata = Table.from_dict(metadata_dict) # Assert assert metadata._transformer_templates['integer'].max_value == 'auto' assert metadata._transformer_templates['integer'].min_value == 'auto' assert metadata._transformer_templates['integer'].rounding == 'auto' assert metadata._transformer_templates['float'].max_value == 'auto' assert metadata._transformer_templates['float'].min_value == 'auto' assert metadata._transformer_templates['float'].rounding == 'auto'
def test__transform_constraints_is_condition_drops_columns(self): """Test that method drops columns when necessary. The ``_transform_constraints`` method is expected to drop columns associated with a constraint when its transform raises a ``MissingConstraintColumnError`` and the ``is_condition`` flag is True. Input: - Table data - ``is_condition`` set to True Output: - Table with dropped columns """ # Setup data = pd.DataFrame({ 'item 0': [0, 1, 2], 'item 1': [3, 4, 5] }, index=[0, 1, 2]) constraint_mock = Mock() constraint_mock.transform.side_effect = MissingConstraintColumnError(missing_columns=[]) constraint_mock.constraint_columns = ['item 0'] table_mock = Mock() table_mock._constraints = [constraint_mock] # Run result = Table._transform_constraints(table_mock, data, True) # Assert expected_result = pd.DataFrame({ 'item 1': [3, 4, 5] }, index=[0, 1, 2]) assert result.equals(expected_result)
def test__get_faker_method_pass_args(self): """Test that ``_get_faker_method`` method utilizes parameters passed in category argument. The ``_get_faker_method`` method uses the parameters passed to it in the category argument. Input: - Faker object to create faked values with. - Category tuple of category name and parameters passed to the method creating fake values. Output: - Fake values created with the specified method from the Faker object. Utilizing the arguments given to it. """ # Setup metadata_dict = { 'fields': { 'foo': { 'type': 'categorical', 'pii': True, 'pii_category': 'ean' } } } metadata = Table.from_dict(metadata_dict) # Run fake_8_ean = metadata._get_faker_method(Faker(), ('ean', 8)) ean_8 = fake_8_ean() fake_13_ean = metadata._get_faker_method(Faker(), ('ean', 13)) ean_13 = fake_13_ean() # Assert assert len(ean_8) == 8 assert len(ean_13) == 13
def test___init__calls_prepare_constraints(self, _prepare_constraints_mock): """Test that ``__init__`` method calls ``_prepare_constraints""" # Run Table(constraints=[]) # Assert _prepare_constraints_mock.called_once_with([])
def test_fit_constraint_transform_missing_columns_error(self, warnings_mock): """Test the ``fit`` method when transform raises a errors. The ``fit`` method should loop through all the constraints and try to fit them. Then it should loop through again and try to transform. If a ``MissingConstraintColumnError`` or ``FunctionError`` is raised, a warning should be raised and reject sampling should be used. Setup: - Set the ``_constraints`` to be a list of mocked constraints. - Set constraint mocks to raise ``MissingConstraintColumnError`` and ``FunctionError`` when calling transform. - Mock warnings module. Input: - A ``pandas.DataFrame``. Side effect: - ``MissingConstraintColumnError`` and ``FunctionError`` warning messages. """ # Setup data = pd.DataFrame({'a': [1, 2, 3]}) instance = Table() constraint1 = Mock() constraint2 = Mock() constraint3 = Mock() constraint1.transform.return_value = data constraint2.transform.side_effect = MissingConstraintColumnError(['column']) constraint3.transform.side_effect = FunctionError() instance._constraints = [constraint1, constraint2, constraint3] # Run instance.fit(data) # Assert constraint1.fit.assert_called_once_with(data) constraint2.fit.assert_called_once_with(data) constraint3.fit.assert_called_once_with(data) assert warnings_mock.warn.call_count == 2 warning_message1 = ( "Mock cannot be transformed because columns: ['column'] were not found. Using the " 'reject sampling approach instead.' ) warning_message2 = 'Error transforming Mock. Using the reject sampling approach instead.' warnings_mock.warn.assert_has_calls([call(warning_message1), call(warning_message2)])
def _fit_metadata(self, data): """Generate a new Table metadata and fit it to the data. The information provided will be used to create the Table instance and then the rest of information will be learned from the given data. Args: data (pandas.DataFrame): Data to learn from. """ metadata = Table( field_names=self._field_names, primary_key=self._primary_key, field_types=self._field_types, anonymize_fields=self._anonymize_fields, transformer_templates=self.TRANSFORMER_TEMPLATES, ) metadata.fit(data) self._metadata = metadata
def test_transform_calls__transform_constraints(self): """Test that the `transform` method calls `_transform_constraints` with right parameters The ``transform`` method is expected to call the ``_transform_constraints`` method with the data and correct value for ``on_missing_column``. Input: - Table data Side Effects: - Calls _transform_constraints """ # Setup data = pd.DataFrame( { 'item 0': [0, 1, 2], 'item 1': [True, True, False] }, index=[0, 1, 2]) dtypes = {'item 0': 'int', 'item 1': 'bool'} table_mock = Mock() table_mock.get_dtypes.return_value = dtypes table_mock._transform_constraints.return_value = data table_mock._anonymize.return_value = data table_mock._hyper_transformer.transform.return_value = data # Run Table.transform(table_mock, data, 'error') # Assert expected_data = pd.DataFrame( { 'item 0': [0, 1, 2], 'item 1': [True, True, False] }, index=[0, 1, 2]) mock_calls = table_mock._transform_constraints.mock_calls args = mock_calls[0][1] assert len(mock_calls) == 1 assert args[0].equals(expected_data) assert args[1] == 'error'
def test__make_anonymization_mappings_unique_faked_value_in_field( self, mock_table): """Test that ``_make_anonymization_mappings`` method creates mappings for anonymized values. The ``_make_anonymization_mappings`` method should map equal values in the original data to the same faked value. Input: - DataFrame with a field that should be anonymized based on the metadata description. Side Effect: - Mappings are created from the original values to faked values. """ # Setup metadata = Mock() metadata._ANONYMIZATION_MAPPINGS = {} foo_metadata = { 'type': 'categorical', 'pii': True, 'pii_category': 'email' } metadata._fields_metadata = {'foo': foo_metadata} data = pd.DataFrame({ 'foo': ['*****@*****.**', '*****@*****.**', '*****@*****.**'] }) # Run Table._make_anonymization_mappings(metadata, data) # Assert assert mock_table._get_fake_values.called_once_with(foo_metadata, 2) mappings = metadata._ANONYMIZATION_MAPPINGS[id(metadata)] assert len(mappings) == 1 foo_mappings = mappings['foo'] assert len(foo_mappings) == 2 assert list( foo_mappings.keys()) == ['*****@*****.**', '*****@*****.**']
def __init__(self, field_names=None, field_types=None, field_transformers=None, anonymize_fields=None, primary_key=None, constraints=None, table_metadata=None, rounding='auto', min_value='auto', max_value='auto'): if table_metadata is None: self._metadata = Table(field_names=field_names, primary_key=primary_key, field_types=field_types, field_transformers=field_transformers, anonymize_fields=anonymize_fields, constraints=constraints, dtype_transformers=self._DTYPE_TRANSFORMERS, rounding=rounding, min_value=min_value, max_value=max_value) self._metadata_fitted = False else: table_metadata = deepcopy(table_metadata) for arg in (field_names, primary_key, field_types, anonymize_fields, constraints): if arg: raise ValueError( 'If table_metadata is given {} must be None'.format( arg.__name__)) if isinstance(table_metadata, dict): table_metadata = Table.from_dict(table_metadata) table_metadata._dtype_transformers.update(self._DTYPE_TRANSFORMERS) self._metadata = table_metadata self._metadata_fitted = table_metadata.fitted
def test__transform_constraints(self): """Test that method correctly transforms data based on constraints The ``_transform_constraints`` method is expected to loop through constraints and call each constraint's ``transform`` method on the data. Input: - Table data Output: - Transformed data """ # Setup data = pd.DataFrame({ 'item 0': [0, 1, 2], 'item 1': [3, 4, 5] }, index=[0, 1, 2]) transformed_data = pd.DataFrame({ 'item 0': [0, 0.5, 1], 'item 1': [6, 8, 10] }, index=[0, 1, 2]) first_constraint_mock = Mock() second_constraint_mock = Mock() first_constraint_mock.transform.return_value = transformed_data second_constraint_mock.return_value = transformed_data table_instance = Table() table_instance._constraints = [first_constraint_mock, second_constraint_mock] # Run result = table_instance._transform_constraints(data) # Assert assert result.equals(transformed_data) first_constraint_mock.transform.assert_called_once_with(data) second_constraint_mock.transform.assert_called_once_with(transformed_data) assert table_instance._constraints_to_reverse == [ first_constraint_mock, second_constraint_mock ]
def test_fit_fits_and_transforms_constraints(self): """Test the ``fit`` method. The ``fit`` method should loop through all the constraints, fit them, and then call ``transform`` for all of them. Setup: - Set the ``_constraints`` to be a list of mocked constraints. Input: - A ``pandas.DataFrame``. Output: - Same ``pandas.DataFrame``. Side effect: - Each constraint should be fit and transform the data. """ # Setup data = pd.DataFrame({'a': [1, 2, 3]}) transformed_data = pd.DataFrame({'a': [4, 5, 6]}) instance = Table() constraint1 = Mock() constraint2 = Mock() constraint1.transform.return_value = transformed_data constraint2.transform.return_value = data instance._constraints = [constraint1, constraint2] # Run instance.fit(data) # Assert constraint1.fit.assert_called_once_with(data) constraint2.fit.assert_called_once_with(data) constraint1.transform.assert_called_once_with(data) constraint2.transform.assert_called_once_with(transformed_data)
def test__validate_data_on_constraints_invalid_input(self): """Test the ``Table._validate_data_on_constraints`` method. Expect that the method returns False when the constraint columns are in the given data, and the constraint.is_valid method returns False for any row. Input: - Table data contains an invalid row Output: - None Side Effects: - A ConstraintsNotMetError is thrown """ # Setup data = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]}, index=[0, 1, 2]) constraint_mock = Mock() constraint_mock.is_valid.return_value = pd.Series([True, False, True]) constraint_mock.constraint_columns = ['a', 'b'] table_mock = Mock() table_mock._constraints = [constraint_mock] # Run and assert with pytest.raises(ConstraintsNotMetError): Table._validate_data_on_constraints(table_mock, data)