def assertRowErrors(self, row_values, errors_expected, schema_values=None, columns=None): """Set up a HXL row and count the errors in it""" errors = [] def callback(error): errors.append(error) if schema_values is None: schema = hxl.schema(hxl.data(self.DEFAULT_SCHEMA), callback=callback) else: schema = hxl.schema(hxl.data(schema_values), callback=callback) if columns is None: columns = self.DEFAULT_COLUMNS row = Row(values=row_values, columns=[Column.parse(tag) for tag in columns]) schema.start() if errors_expected == 0: self.assertTrue(schema.validate_row(row)) else: self.assertFalse(schema.validate_row(row)) self.assertEqual(len(errors), errors_expected)
def assertRowErrors(self, row_values, errors_expected, schema_values=None, columns=None): """Set up a HXL row and count the errors in it""" errors = [] def callback(error): errors.append(error) if schema_values is None: schema = hxl.schema(hxl.data(self.DEFAULT_SCHEMA), callback=callback) else: schema = hxl.schema(hxl.data(schema_values), callback=callback) if columns is None: columns = self.DEFAULT_COLUMNS row = Row( values=row_values, columns=[Column.parse(tag) for tag in columns] ) schema.start() if errors_expected == 0: self.assertTrue(schema.validate_row(row)) else: self.assertFalse(schema.validate_row(row)) self.assertEqual(len(errors), errors_expected)
def validate(self, schema=None, callback=None): """ Validate the current dataset. @param schema (optional) the pre-compiled schema, schema filename, URL, file object, etc. Defaults to a built-in schema. @param callback (optional) a function to call with each error or warning. Defaults to collecting errors in an array and returning them. """ return hxl.schema(schema, callback).validate(self)
def test_outliers(self): BAD_VALUES = ['1', '1000000'] raw_data = [ ['#affected'], ['1'], ['1000000'] ] for i in range(0, 10): raw_data += [ ['100'], ['200'], ['800'] ] seen_callback = False def callback(e): nonlocal seen_callback seen_callback = True self.assertTrue(e.value in BAD_VALUES) schema = hxl.schema([ ['#valid_tag', '#valid_value+outliers'], ['#affected', 'true'] ], callback=callback) data = hxl.data(raw_data) self.assertFalse(schema.validate(data)) self.assertTrue(seen_callback)
def test_truthy(self): schema = hxl.schema( hxl.data(resolve_path('files/test_validation/truthy-schema.json'), allow_local=True)) BAD_DATA = [['#sector'], ['Health']] self.assertFalse(schema.validate(hxl.data(BAD_DATA))) GOOD_DATA = [['#adm2+code'], ['xxx']] self.assertTrue(schema.validate(hxl.data(GOOD_DATA)))
def test_taxonomy_bad(self): schema = hxl.schema(SCHEMA_TAXONOMY) result = hxl.validate(hxl.data(DATA_TAXONOMY_BAD), schema) self.assertFalse(result['is_valid']) self.assertEqual(1, result['stats']['error']) self.assertEqual(0, result['stats']['external']) self.assertEqual(1, len(result['issues'])) self.assertEqual(0, len(result['external_issues']))
def test_taxonomy_missing(self): """Handle a missing external taxonomy.""" schema = hxl.schema(SCHEMA_TAXONOMY_MISSING) result = hxl.validate(hxl.data(DATA_TAXONOMY_GOOD), schema) self.assertTrue(result['is_valid']) self.assertTrue('external_issues' in result) self.assertEqual(0, result['stats']['error']) self.assertEqual(1, result['stats']['external']) self.assertEqual(0, len(result['issues'])) self.assertEqual(1, len(result['external_issues']))
def test_truthy(self): schema = hxl.schema(hxl.data(resolve_path('files/test_validation/truthy-schema.json'), allow_local=True)) BAD_DATA = [ ['#sector'], ['Health'] ] self.assertFalse(schema.validate(hxl.data(BAD_DATA))) GOOD_DATA = [ ['#adm2+code'], ['xxx'] ] self.assertTrue(schema.validate(hxl.data(GOOD_DATA)))
def test_consistent_datatype(self): def callback(e): # expect that 'xxx' will be the bad value self.assertEqual('xxx', e.value) schema = hxl.schema([['#valid_tag', '#valid_datatype+consistent'], ['#affected', 'true']], callback=callback) data = hxl.data([['#affected'], ['100'], ['xxx'], ['200'], ['800']]) self.assertFalse(schema.validate(data))
def test_default_schema(self): """Test the built-in schema""" DATASET = [ ['#affected', '#date'], ['100', '2018-01-01'], # OK ['200', 'xxx'], # bad date ['xxx', '2018-03-01'], # bad number ['100', ' 2018-04-01 '], # extra whitespace ] errors_seen = 0 def callback(e): nonlocal errors_seen errors_seen += 1 self.assertFalse(hxl.schema(callback=callback).validate(hxl.data(DATASET))) self.assertEqual(3, errors_seen)
def assertColumnErrors(self, column_values, errors_expected, schema_values): """Set up a list of HXL columns and count the errors""" errors = [] def callback(error): errors.append(error) schema = hxl.schema(schema_values, callback=callback) dataset = make_dataset(column_values) schema.start() if errors_expected == 0: self.assertTrue(schema.validate_dataset(dataset)) else: self.assertFalse(schema.validate_dataset(dataset)) self.assertEqual(len(errors), errors_expected)
def assertDatasetErrors(self, dataset, errors_expected, schema=None): errors = [] def callback(error): errors.append(error) if schema is None: schema = self.SCHEMA schema = hxl.schema(schema, callback) if errors_expected == 0: self.assertTrue(schema.validate(hxl.data(dataset))) else: self.assertFalse(schema.validate(hxl.data(dataset))) self.assertEqual(len(errors), errors_expected)
def test_suggested_value_correlation_key(self): """Complex test: can we suggest a value based on the correlation key?""" def callback(e): self.assertEqual('yy', e.suggested_value) schema = hxl.schema( [['#valid_tag', '#valid_correlation'], ['#foo', '#bar']], callback) data = hxl.data([ ['#foo', '#bar'], ['yy', 'yyy'], ['yy', 'yyy'], ['xx', 'xxx'], ['xx', 'xxx'], ['xx', 'yyy'], ]) self.assertFalse(schema.validate(data))
def test_suggested_value_correlation_key(self): """Complex test: can we suggest a value based on the correlation key?""" def callback(e): self.assertEqual('yy', e.suggested_value) schema = hxl.schema([ ['#valid_tag', '#valid_correlation'], ['#foo', '#bar'] ], callback) data = hxl.data([ ['#foo', '#bar'], ['yy', 'yyy'], ['yy', 'yyy'], ['xx', 'xxx'], ['xx', 'xxx'], ['xx', 'yyy'], ]) self.assertFalse(schema.validate(data))
def do_validate(source, schema_url=None, severity_level=None): """Validate a source, and return a list of errors.""" min_severity = SEVERITY_LEVELS.get(severity_level, -1) errors = {} def callback(error): if SEVERITY_LEVELS.get(error.rule.severity, 0) >= min_severity: rule_hash = make_rule_hash(error.rule) if errors.get(rule_hash) is None: errors[rule_hash] = [] errors[rule_hash].append(error) schema = hxl.schema(schema_url, callback) counter = source.row_counter() result = schema.validate(counter) if counter.row_count == 0: return False else: return errors
def test_default_schema(self): """Test the built-in schema""" DATASET = [ ['#affected', '#date'], ['100', '2018-01-01'], # OK ['200', 'xxx'], # bad date ['xxx', '2018-03-01'], # bad number ['100', ' 2018-04-01 '], # extra whitespace ] errors_seen = 0 def callback(e): nonlocal errors_seen errors_seen += 1 self.assertFalse( hxl.schema(callback=callback).validate(hxl.data(DATASET))) self.assertEqual(3, errors_seen)
def test_consistent_datatype(self): def callback(e): # expect that 'xxx' will be the bad value self.assertEqual('xxx', e.value) schema = hxl.schema([ ['#valid_tag', '#valid_datatype+consistent'], ['#affected', 'true'] ], callback=callback) data = hxl.data([ ['#affected'], ['100'], ['xxx'], ['200'], ['800'] ]) self.assertFalse(schema.validate(data))
def test_outliers(self): BAD_VALUES = ['1', '1000000'] raw_data = [['#affected'], ['1'], ['1000000']] for i in range(0, 10): raw_data += [['100'], ['200'], ['800']] seen_callback = False def callback(e): nonlocal seen_callback seen_callback = True self.assertTrue(e.value in BAD_VALUES) schema = hxl.schema( [['#valid_tag', '#valid_value+outliers'], ['#affected', 'true']], callback=callback) data = hxl.data(raw_data) self.assertFalse(schema.validate(data)) self.assertTrue(seen_callback)
def test_load_bad(self): schema = hxl.schema(SCHEMA_BASIC) self.assertFalse(schema.validate(hxl.data(DATA_BAD)))
def test_load_default(self): schema = hxl.schema() self.assertTrue(0 < len(schema.rules)) self.assertTrue(schema.validate(hxl.data(DATA_GOOD)))
def test_load_good(self): schema = hxl.schema(SCHEMA_BASIC) self.assertTrue(schema.validate(hxl.data(DATA_GOOD)))
def test_taxonomy_all(self): schema = hxl.schema(SCHEMA_TAXONOMY_ALL) result = hxl.validate(hxl.data(DATA_TAXONOMY_GOOD), schema) self.assertTrue(result['is_valid']) self.assertEqual(0, result['stats']['error'])
def test_taxonomy_good(self): schema = hxl.schema(SCHEMA_TAXONOMY) self.assertTrue(schema.validate(hxl.data(DATA_TAXONOMY_GOOD)))