def validate(self, entity): """ Validate entity with state to see if generic validations for states are correct. Checks that are being performed: - begin_geldigheid should not be after eind_geldigheid (when filled) - volgnummer should be a positive number and unique in the collection :param entity: a GOB entity :return: """ self._validate_begin_geldigheid(entity) # volgnummer should a positive number and unique in the collection if entity[FIELD.SEQNR] < 1: log_issue(logger, QA_LEVEL.ERROR, Issue(QA_CHECK.Format_numeric, entity, self.source_id, FIELD.SEQNR)) self.validated = False identificatie = str(entity[self.source_id]) if entity[FIELD.SEQNR] in self.volgnummers[identificatie]: log_issue(logger, QA_LEVEL.ERROR, Issue(QA_CHECK.Value_unique, entity, self.source_id, FIELD.SEQNR)) self.validated = False # Only one eind_geldigheid may be empty per entity if entity[FIELD.END_VALIDITY] is None: if self.end_date.get(identificatie): log_issue(logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_empty_once, entity, self.source_id, FIELD.END_VALIDITY)) self.end_date[identificatie] = True # Add the volgnummer to the set for this entity identificatie self.volgnummers[identificatie].add(entity[FIELD.SEQNR])
def test_state_attributes(self): entity = { 'id': 'any id', 'attr': 'any attr', FIELD.SEQNR: 'any seqnr', FIELD.START_VALIDITY: '2006-01-20', FIELD.END_VALIDITY: '2006-01-20 12:35' } issue = Issue({'id': 'any_check', 'msg': 'any msg'}, entity, 'id', 'attr') self.assertEqual(getattr(issue, FIELD.SEQNR), entity[FIELD.SEQNR]) self.assertEqual(getattr(issue, FIELD.START_VALIDITY), '2006-01-20T00:00:00') self.assertEqual(getattr(issue, FIELD.END_VALIDITY), '2006-01-20T12:35:00') for v in [datetime.date(2020, 1, 20), datetime.datetime(2020, 1, 20), '20200120', '2020-01-20']: entity[FIELD.START_VALIDITY] = v issue = Issue({'id': 'any_check', 'msg': 'any msg'}, entity, 'id', 'attr') self.assertEqual(getattr(issue, FIELD.START_VALIDITY), '2020-01-20T00:00:00') entity = { 'id': 'any id', 'attr': 'any attr' } issue = Issue({'id': 'any_check', 'msg': 'any msg'}, entity, 'id', 'attr') for attr in [FIELD.SEQNR, FIELD.START_VALIDITY, FIELD.END_VALIDITY]: self.assertEqual(getattr(issue, attr), None)
def test_issue(self): entity = { 'id': 'any id', 'attr': 'any attr' } issue = Issue({'id': 'any_check'}, entity, 'id', 'attr') self.assertEqual(issue.check, {'id': 'any_check'}) self.assertEqual(issue.entity_id_attribute, 'id') self.assertEqual(issue.entity_id, entity['id']) self.assertIsNone(getattr(issue, FIELD.SEQNR)) self.assertEqual(issue.attribute, 'attr') self.assertEqual(issue.value, entity['attr']) self.assertIsNone(issue.compared_to) self.assertIsNone(issue.compared_to_value) issue = Issue({'id': 'any_check'}, entity, None, 'attr') self.assertEqual(issue.entity_id_attribute, Issue._DEFAULT_ENTITY_ID) self.assertIsNone(issue.entity_id) entity[FIELD.SEQNR] = 'any seqnr' issue = Issue({'id': 'any_check'}, entity, None, 'attr') self.assertEqual(getattr(issue, FIELD.SEQNR), 'any seqnr') issue = Issue({'id': 'any_check'}, entity, None, 'attr', 'other attr') self.assertEqual(issue.compared_to, 'other attr') self.assertIsNone(issue.compared_to_value) entity['other attr'] = 'any other value' issue = Issue({'id': 'any_check'}, entity, None, 'attr', 'other attr') self.assertEqual(issue.compared_to, 'other attr') self.assertEqual(issue.compared_to_value, 'any other value')
def _query_missing(query, check, attr): """ Query for any missing attributes :param query: query to execute :param items_name: name of the missing attribute :return: None """ historic_count = 0 for data in _get_data(query): if data.get('eind_geldigheid') is None: # Report actual warnings # Create an issue for the failing check # The entity that contains the error is data, the id-attribute is named id # The attribute that is in error is called bronwaarde issue = Issue(check, data, 'id', 'bronwaarde') issue.attribute = attr # Set the name of the attribute that has the failing bronwaarde log_issue(logger, QA_LEVEL.WARNING, issue) else: # Count historic warnings historic_count += 1 items_name = f"{attr} {check['msg']}" if historic_count > 0: logger.data_info(f"{items_name}: {historic_count} historical errors")
def _validate_begin_geldigheid(self, entity): if entity[FIELD.START_VALIDITY]: if entity[FIELD.END_VALIDITY] and entity[FIELD.START_VALIDITY] > entity[FIELD.END_VALIDITY]: # Start-Validity cannot be after End-Validity log_issue(logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_not_after, entity, self.source_id, FIELD.START_VALIDITY, compared_to=FIELD.END_VALIDITY)) else: log_issue(logger, QA_LEVEL.ERROR, Issue(QA_CHECK.Value_not_empty, entity, self.source_id, FIELD.START_VALIDITY)) self.validated = False
def test_format_value(self): entity = { 'id': 'any id', 'attr': 'any attr' } issue = Issue({'id': 'any_check'}, entity, 'id', 'attr') result = issue._format_value(None) self.assertEqual(result, Issue._NO_VALUE) result = issue._format_value(1) self.assertEqual(result, "1")
def check_relation_conflicts(catalog_name, collection_name, attribute_name): updater = Relater(catalog_name, collection_name, attribute_name) result = updater.get_conflicts() for row in result: row = dict(row) # Log conflicting relations if (row.get("row_number") or 0) > 1: row['volgnummer'] = row.get('src_volgnummer') issue = Issue(QA_CHECK.Unique_destination, row, 'src_id', 'bronwaarde') issue.attribute = attribute_name log_issue(logger, QA_LEVEL.WARNING, issue)
def test_json(self): entity = { 'id': 'any id', 'attr': 'any attr', 'compared attr': 'any compared value' } issue = Issue({'id': 'any_check'}, entity, 'id', 'attr', 'compared attr') expected_json = '{"check": {"id": "any_check"}, "entity": {"id": "any id", "volgnummer": null, "begin_geldigheid": null, "eind_geldigheid": null, "attr": "any attr"}, "id_attribute": "id", "attribute": "attr", "compared_to": "compared attr", "compared_to_value": "any compared value"}' self.assertEqual(expected_json, issue.json) from_json = Issue.from_json(expected_json) self.assertEqual(from_json.value, issue.value)
def test_get_validity(self): entity = { 'id': 'any id', 'validity': '2020-05-22' } issue = Issue({'id': 'any_check'}, entity, 'id', 'validity') self.assertEqual(issue._get_validity(entity, 'validity'), '2020-05-22T00:00:00') entity['validity'] = datetime.date(year=1020, month=5, day=22) self.assertEqual(issue._get_validity(entity, 'validity'), '1020-05-22T00:00:00') # Conversion fails, set to None entity['validity'] = 'non date' self.assertEqual(issue._get_validity(entity, 'validity'), None)
def test_sorted_value(self): entities = [{'id': 'any_id', 'attr': value} for value in [1, 8, 7, 5, 9]] issues = [Issue({'id': 'any_check'}, entity, 'id', 'attr') for entity in entities] issue = issues[0] for other_issue in issues[1:]: issue.join_issue(other_issue) self.assertEqual(issue.value, '1, 5, 7, 8, 9')
def add_issue(self, issue, level): if not self._offload_file: self.open_offload_file() id = issue.get_unique_id() if self._issues.get(id): from gobcore.quality.issue import Issue # Get the pointer to the issue in the offload file, seek the line and remove any comma or newline issue_offset = self._issues.get(id) self._offload_file.seek(issue_offset) existing_issue_string = self._offload_file.readline().rstrip("\n") # Join this issue with an already existing issue for the same check, attribute and entity existing_issue = Issue.from_json(existing_issue_string) existing_issue.join_issue(issue) # Return to the end of the file and write the new issue self._offload_file.seek(0, io.SEEK_END) self._issues[id] = self.write_issue(existing_issue) else: # Write this issue and save the byte offset to the issue self._issues[id] = self.write_issue(issue) self._data_msg_count['data_' + level] += 1
def _check_gebruiksdoel_plus(self, entity, gebruiksdoelen): """ The value of the gebruiksdoel_plus (woonfunctie or gezondheidszorgfunctie) may only be filled if gebruiksdoel is either woonfunctie or gezondheidszorgfunctie. """ qa_checks = { 'woonfunctie': QA_CHECK.Value_gebruiksdoel_woonfunctie_should_match, 'gezondheidszorgfunctie': QA_CHECK.Value_gebruiksdoel_gezondheidszorgfunctie_should_match } # Check both woonfunctie and gezondheidszorgfunctie for check_value in ['woonfunctie', 'gezondheidszorgfunctie']: attribute_name = f'gebruiksdoel_{check_value}' attribute_value = entity.get(attribute_name, {}).get('omschrijving') if attribute_value and check_value not in gebruiksdoelen: log_issue( logger, QA_LEVEL.WARNING, Issue(qa_checks[check_value], entity, self.source_id, attribute_name, compared_to='gebruiksdoel'))
def test_issue_fails(self): entity = { 'id': 'any id', 'attr': 'any attr' } with self.assertRaises(IssueException): issue = Issue({}, entity, 'id', 'attr')
def validate_pand(self, entity): """ Validate pand Checks that are being performed: - aantal_bouwlagen does not match the highest and lowest bouwlagen - aantal_bouwlagen isn't filled but hoogste and laagste bouwlaag is :param entities: the list of entities :return: """ laagste_bouwlaag = entity.get('laagste_bouwlaag') hoogste_bouwlaag = entity.get('hoogste_bouwlaag') aantal_bouwlagen = entity.get('aantal_bouwlagen') counted_bouwlagen = None if all(b is not None for b in [laagste_bouwlaag, hoogste_bouwlaag]): count_ground_floor = 1 if laagste_bouwlaag < 1 else 0 counted_bouwlagen = (hoogste_bouwlaag + count_ground_floor) - laagste_bouwlaag # aantal_bouwlagen should match the highest and lowest value if all([aantal_bouwlagen, counted_bouwlagen ]) and aantal_bouwlagen != counted_bouwlagen: log_issue( logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_aantal_bouwlagen_should_match, entity, self.source_id, "aantal_bouwlagen", compared_to= "hoogste_bouwlaag and laagste_bouwlaag combined", compared_to_value=counted_bouwlagen)) if not aantal_bouwlagen and all([ value is not None for value in [laagste_bouwlaag, hoogste_bouwlaag] ]): log_issue( logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_aantal_bouwlagen_not_filled, entity, self.source_id, "aantal_bouwlagen"))
def _check_gebruiksdoelen_duplicates(self, entity: dict, gebruiksdoelen: list[str]): counts = reduce(lambda d, x: d | {x: d[x] + 1}, gebruiksdoelen, defaultdict(int)) if [v for v in counts.values() if v > 1]: log_issue( logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_duplicates, entity, self.source_id, 'gebruiksdoel'))
def test_get_value(self): entity = { 'id': 'any id', 'attr': 'any attr' } issue = Issue({'id': 'any_check'}, entity, 'id', 'attr') entity['x'] = None result = issue._get_value(entity, 'x') self.assertIsNone(result) for value in [1, True, "s", 2.0]: entity['x'] = value result = issue._get_value(entity, 'x') self.assertEqual(result, value) entity['x'] = datetime.datetime.now() result = issue._get_value(entity, 'x') self.assertTrue(isinstance(result, str))
def test_log_issue_no_entity(self): # Skip issues that are not linked to an entity entity = { 'attr': 'any attr' } issue = Issue({'id': 'any_check', 'msg': 'any msg'}, entity, 'id', 'attr') mock_logger = MagicMock() mock_logger.get_name.return_value = "any name" log_issue(mock_logger, QA_LEVEL.INFO, issue) mock_logger.add_issue.assert_not_called()
def test_log_issue(self): # Issue without id. Should add issue, but not log it entity = { 'id': 'any id', 'attr': 'any attr' } issue = Issue({'id': 'any_check', 'msg': 'any msg'}, entity, 'id', 'attr') mock_logger = MagicMock() mock_logger.get_name.return_value = "any name" log_issue(mock_logger, QA_LEVEL.INFO, issue) mock_logger.add_issue.assert_called() mock_logger.data_info.assert_not_called() # Issue with id. Should not add issue, but should log it mock_logger.reset_mock() issue = Issue({'id': 'any_check', 'msg': 'any msg'}, {}, 'id', 'attr') log_issue(mock_logger, QA_LEVEL.INFO, issue) mock_logger.add_issue.assert_not_called() mock_logger.data_info.assert_called()
def _check_gebruiksdoelen_exist(self, entity: dict, gebruiksdoelen: list[str]): for gebruiksdoel in gebruiksdoelen: if gebruiksdoel not in VALID_GEBRUIKSDOEL_DOMAIN: log_issue( logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_gebruiksdoel_in_domain, entity, self.source_id, 'gebruiksdoel')) # Stop checking if the issue has occured, the whole list will be in the data warning break
def test_get_explanation(self): entity = { 'id': 'any id', 'attr': 'any attr' } issue = Issue({'id': 'any_check'}, entity, 'id', 'attr') self.assertIsNone(issue.get_explanation()) issue.compared_to = 'to' issue.compared_to_value = 'value' self.assertEqual(issue.get_explanation(), 'to = value') issue.explanation = 'explanation' self.assertEqual(issue.get_explanation(), 'explanation')
def _check_aantal_eenheden_complex(self, entity): aantal_eenheden_complex = entity.get('aantal_eenheden_complex') check_attributes = [ 'gebruiksdoel_woonfunctie', 'gebruiksdoel_gezondheidszorgfunctie' ] check_values = [ entity.get(attr, {}).get('omschrijving', '') or '' for attr in check_attributes ] # If aantal_eenheden_complex is filled and complex not in the check values log a data warning if aantal_eenheden_complex is not None and all( 'complex' not in value.lower() for value in check_values): log_issue( logger, QA_LEVEL.WARNING, Issue( QA_CHECK.Value_aantal_eenheden_complex_should_be_empty, entity, self.source_id, 'aantal_eenheden_complex', compared_to= 'gebruiksdoel_woonfunctie and gebruiksdoel_gezondheidszorgfunctie', compared_to_value=', '.join(check_values))) # If complex in one of the check values, but aantal_eenheden_complex is not filled, log a data warning if any('complex' in value.lower() for value in check_values) and not aantal_eenheden_complex: log_issue( logger, QA_LEVEL.WARNING, Issue( QA_CHECK.Value_aantal_eenheden_complex_should_be_filled, entity, self.source_id, 'aantal_eenheden_complex', compared_to= 'gebruiksdoel_woonfunctie and gebruiksdoel_gezondheidszorgfunctie', compared_to_value=', '.join(check_values)))
def enrich_nummeraanduiding(self, nummeraanduiding): # ligt_in_woonplaats can have multiple values, use the last value and log a warning bronwaarde = nummeraanduiding.get('ligt_in_bag_woonplaats') if bronwaarde and ';' in bronwaarde: nummeraanduiding['ligt_in_bag_woonplaats'] = bronwaarde.split( ';')[-1] if not self.multiple_values_logged: log_issue( logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_1_1_reference, nummeraanduiding, None, 'ligt_in_bag_woonplaats')) self.multiple_values_logged = True
def _attr_check(self, check, attr, entity): level = check["level"] # Check if (nested) attr is available in entity key_list = split_field_reference(attr) _current_level = entity for key in key_list: if key in _current_level: _current_level = _current_level[key] else: # If a fatal check has failed, mark the validation as fatal if level == QA_LEVEL.FATAL: self.fatal = True log_issue(logger, level, Issue(QA_CHECK.Attribute_exists, entity, self.entity_id, attr)) return False return True
def validate_bouwblok(self, entity): """ Validate bouwblok Checks that are being performed: - begin_geldigheid can not be in the future (not fatal) :param entities: the list of entities :return: """ # begin_geldigheid can not be in the future if entity[FIELD.START_VALIDITY] > datetime.datetime.utcnow(): log_issue( logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_not_in_future, entity, self.source_id, FIELD.START_VALIDITY))
def _qa_check(self, check, attr, entity): # noqa: C901 level = check["level"] key_list = split_field_reference(attr) value = get_nested_item(entity, *key_list) validate_function = self.validate_functions.get(check['type']) is_correct = validate_function(check, value) # If the value doesn't pass the qa check, handle the correct way if not is_correct: # If a fatal check has failed, mark the validation as fatal if level == QA_LEVEL.FATAL: self.fatal = True log_issue(logger, level, Issue(check, entity, self.entity_id, attr)) return False return True
def date_comparison_issue(self, entity, date_field, compare_date_field): """ Log date comparison Logs the a warning for a date comparison between 2 fields :param entity: the entity which is compared :param date_field: field name of the date :param compare_date_field: field name of the compared date :return: """ log_issue( logger, QA_LEVEL.WARNING, Issue(QA_CHECK.Value_not_after, entity, self.source_id, date_field, compared_to=compare_date_field))
def test_get_id(self): entity = { 'id': 'any id', 'attr': 'any attr' } issue = Issue({'id': 'any_check'}, entity, 'id', 'attr') self.assertEqual(issue.get_unique_id(), 'any_check_attr_any id') entity = { 'id': 'any id', 'attr': 'any attr', FIELD.SEQNR: 1 } issue = Issue({'id': 'any_check'}, entity, 'id', 'attr') self.assertEqual(issue.get_unique_id(), 'any_check_attr_any id_1')
def test_log_args(self): entity = { 'id': 'any id', 'attr': 'any attr' } check = { 'id': 'any_check', 'msg': 'any msg' } issue = Issue(check, entity, 'id', 'attr') result = issue.log_args() self.assertEqual(result, { 'id': 'attr: any msg', 'data': { 'id': 'any id', FIELD.SEQNR: None, 'attr': 'any attr' }}) issue = Issue(check, entity, 'id', 'attr', 'any compared to', 'any compare to value') result = issue.log_args() self.assertEqual(result, { 'id': 'attr: any msg any compared to', 'data': { 'id': 'any id', FIELD.SEQNR: None, 'attr': 'any attr', 'any compared to': 'any compare to value' }}) result = issue.log_args(any_key="any value") self.assertEqual(result, { 'id': 'attr: any msg any compared to', 'data': { 'id': 'any id', FIELD.SEQNR: None, 'attr': 'any attr', 'any compared to': 'any compare to value', 'any_key': 'any value' }})
def test_msg(self): entity = { 'id': 'any id', 'attr': 'any attr' } check = { 'id': 'any_check', 'msg': 'any msg' } issue = Issue(check, entity, 'id', 'attr') result = issue.msg() self.assertEqual(result, "attr: any msg") issue = Issue(check, entity, 'id', 'attr', 'any compared to') result = issue.msg() self.assertEqual(result, "attr: any msg any compared to")
def test_join_issue(self): entity = { 'id': 'any id', 'attr': 5 } other_entity = { 'id': 'any id', 'attr': 6 } issue = Issue({'id': 'any_check'}, entity, 'id', 'attr') other_issue = Issue({'id': 'any_check'}, other_entity, 'id', 'attr') issue.join_issue(other_issue) self.assertEqual(issue.value, '5, 6') # The same value will not be stored twice issue = Issue({'id': 'any_check'}, entity, 'id', 'attr') other_issue = Issue({'id': 'any_check'}, entity, 'id', 'attr') issue.join_issue(other_issue) self.assertEqual(issue.value, 5) other_issue.entity_id = 'other id' with self.assertRaises(IssueException): issue.join_issue(other_issue)