def assertRowErrors(self,
                        row_values,
                        errors_expected,
                        schema_values=None,
                        columns=None):
        """Set up a HXL row and count the errors in it"""
        errors = []

        def callback(error):
            errors.append(error)

        if schema_values is None:
            schema = hxl.schema(hxl.data(self.DEFAULT_SCHEMA),
                                callback=callback)
        else:
            schema = hxl.schema(hxl.data(schema_values), callback=callback)

        if columns is None:
            columns = self.DEFAULT_COLUMNS

        row = Row(values=row_values,
                  columns=[Column.parse(tag) for tag in columns])

        schema.start()

        if errors_expected == 0:
            self.assertTrue(schema.validate_row(row))
        else:
            self.assertFalse(schema.validate_row(row))
        self.assertEqual(len(errors), errors_expected)
    def assertRowErrors(self, row_values, errors_expected, schema_values=None, columns=None):
        """Set up a HXL row and count the errors in it"""
        errors = []

        def callback(error):
            errors.append(error)

        if schema_values is None:
            schema = hxl.schema(hxl.data(self.DEFAULT_SCHEMA), callback=callback)
        else:
            schema = hxl.schema(hxl.data(schema_values), callback=callback)

        if columns is None:
            columns = self.DEFAULT_COLUMNS

        row = Row(
            values=row_values,
            columns=[Column.parse(tag) for tag in columns]
        )

        schema.start()

        if errors_expected == 0:
            self.assertTrue(schema.validate_row(row))
        else:
            self.assertFalse(schema.validate_row(row))
        self.assertEqual(len(errors), errors_expected)
示例#3
0
 def validate(self, schema=None, callback=None):
     """
     Validate the current dataset.
     @param schema (optional) the pre-compiled schema, schema filename, URL, file object, etc. Defaults to a built-in schema.
     @param callback (optional) a function to call with each error or warning. Defaults to collecting errors in an array and returning them.
     """
     return hxl.schema(schema, callback).validate(self)
    def test_outliers(self):
        BAD_VALUES = ['1', '1000000']

        raw_data = [
            ['#affected'],
            ['1'],
            ['1000000']
        ]

        for i in range(0, 10):
            raw_data += [
                ['100'],
                ['200'],
                ['800']
            ]
            
        seen_callback = False

        def callback(e):
            nonlocal seen_callback
            seen_callback = True
            self.assertTrue(e.value in BAD_VALUES)

        schema = hxl.schema([
            ['#valid_tag', '#valid_value+outliers'],
            ['#affected', 'true']
        ], callback=callback)

        data = hxl.data(raw_data)

        self.assertFalse(schema.validate(data))
        self.assertTrue(seen_callback)
 def test_truthy(self):
     schema = hxl.schema(
         hxl.data(resolve_path('files/test_validation/truthy-schema.json'),
                  allow_local=True))
     BAD_DATA = [['#sector'], ['Health']]
     self.assertFalse(schema.validate(hxl.data(BAD_DATA)))
     GOOD_DATA = [['#adm2+code'], ['xxx']]
     self.assertTrue(schema.validate(hxl.data(GOOD_DATA)))
 def test_taxonomy_bad(self):
     schema = hxl.schema(SCHEMA_TAXONOMY)
     result = hxl.validate(hxl.data(DATA_TAXONOMY_BAD), schema)
     self.assertFalse(result['is_valid'])
     self.assertEqual(1, result['stats']['error'])
     self.assertEqual(0, result['stats']['external'])
     self.assertEqual(1, len(result['issues']))
     self.assertEqual(0, len(result['external_issues']))
 def test_taxonomy_bad(self):
     schema = hxl.schema(SCHEMA_TAXONOMY)
     result = hxl.validate(hxl.data(DATA_TAXONOMY_BAD), schema)
     self.assertFalse(result['is_valid'])
     self.assertEqual(1, result['stats']['error'])
     self.assertEqual(0, result['stats']['external'])
     self.assertEqual(1, len(result['issues']))
     self.assertEqual(0, len(result['external_issues']))
 def test_taxonomy_missing(self):
     """Handle a missing external taxonomy."""
     schema = hxl.schema(SCHEMA_TAXONOMY_MISSING)
     result = hxl.validate(hxl.data(DATA_TAXONOMY_GOOD), schema)
     self.assertTrue(result['is_valid'])
     self.assertTrue('external_issues' in result)
     self.assertEqual(0, result['stats']['error'])
     self.assertEqual(1, result['stats']['external'])
     self.assertEqual(0, len(result['issues']))
     self.assertEqual(1, len(result['external_issues']))
 def test_taxonomy_missing(self):
     """Handle a missing external taxonomy."""
     schema = hxl.schema(SCHEMA_TAXONOMY_MISSING)
     result = hxl.validate(hxl.data(DATA_TAXONOMY_GOOD), schema)
     self.assertTrue(result['is_valid'])
     self.assertTrue('external_issues' in result)
     self.assertEqual(0, result['stats']['error'])
     self.assertEqual(1, result['stats']['external'])
     self.assertEqual(0, len(result['issues']))
     self.assertEqual(1, len(result['external_issues']))
 def test_truthy(self):
     schema = hxl.schema(hxl.data(resolve_path('files/test_validation/truthy-schema.json'), allow_local=True))
     BAD_DATA = [
         ['#sector'],
         ['Health']
     ]
     self.assertFalse(schema.validate(hxl.data(BAD_DATA)))
     GOOD_DATA = [
         ['#adm2+code'],
         ['xxx']
     ]
     self.assertTrue(schema.validate(hxl.data(GOOD_DATA)))
    def test_consistent_datatype(self):
        def callback(e):
            # expect that 'xxx' will be the bad value
            self.assertEqual('xxx', e.value)

        schema = hxl.schema([['#valid_tag', '#valid_datatype+consistent'],
                             ['#affected', 'true']],
                            callback=callback)

        data = hxl.data([['#affected'], ['100'], ['xxx'], ['200'], ['800']])

        self.assertFalse(schema.validate(data))
 def test_default_schema(self):
     """Test the built-in schema"""
     DATASET = [
         ['#affected', '#date'],
         ['100', '2018-01-01'],   # OK
         ['200', 'xxx'],          # bad date
         ['xxx', '2018-03-01'],   # bad number
         ['100', ' 2018-04-01 '], # extra whitespace
     ]
     errors_seen = 0
     def callback(e):
         nonlocal errors_seen
         errors_seen += 1
     self.assertFalse(hxl.schema(callback=callback).validate(hxl.data(DATASET)))
     self.assertEqual(3, errors_seen)
    def assertColumnErrors(self, column_values, errors_expected, schema_values):
        """Set up a list of HXL columns and count the errors"""
        errors = []

        def callback(error):
            errors.append(error)

        schema = hxl.schema(schema_values, callback=callback)
        dataset = make_dataset(column_values)

        schema.start()
        if errors_expected == 0:
            self.assertTrue(schema.validate_dataset(dataset))
        else:
            self.assertFalse(schema.validate_dataset(dataset))
        self.assertEqual(len(errors), errors_expected)
    def assertDatasetErrors(self, dataset, errors_expected, schema=None):
        errors = []

        def callback(error):
            errors.append(error)

        if schema is None:
            schema = self.SCHEMA
        schema = hxl.schema(schema, callback)

        if errors_expected == 0:
            self.assertTrue(schema.validate(hxl.data(dataset)))
        else:
            self.assertFalse(schema.validate(hxl.data(dataset)))

        self.assertEqual(len(errors), errors_expected)
    def test_suggested_value_correlation_key(self):
        """Complex test: can we suggest a value based on the correlation key?"""
        def callback(e):
            self.assertEqual('yy', e.suggested_value)

        schema = hxl.schema(
            [['#valid_tag', '#valid_correlation'], ['#foo', '#bar']], callback)
        data = hxl.data([
            ['#foo', '#bar'],
            ['yy', 'yyy'],
            ['yy', 'yyy'],
            ['xx', 'xxx'],
            ['xx', 'xxx'],
            ['xx', 'yyy'],
        ])
        self.assertFalse(schema.validate(data))
    def assertDatasetErrors(self, dataset, errors_expected, schema=None):
        errors = []

        def callback(error):
            errors.append(error)

        if schema is None:
            schema = self.SCHEMA
        schema = hxl.schema(schema, callback)

        if errors_expected == 0:
            self.assertTrue(schema.validate(hxl.data(dataset)))
        else:
            self.assertFalse(schema.validate(hxl.data(dataset)))

        self.assertEqual(len(errors), errors_expected)
    def assertColumnErrors(self, column_values, errors_expected,
                           schema_values):
        """Set up a list of HXL columns and count the errors"""
        errors = []

        def callback(error):
            errors.append(error)

        schema = hxl.schema(schema_values, callback=callback)
        dataset = make_dataset(column_values)

        schema.start()
        if errors_expected == 0:
            self.assertTrue(schema.validate_dataset(dataset))
        else:
            self.assertFalse(schema.validate_dataset(dataset))
        self.assertEqual(len(errors), errors_expected)
 def test_suggested_value_correlation_key(self):
     """Complex test: can we suggest a value based on the correlation key?"""
     def callback(e):
         self.assertEqual('yy', e.suggested_value)
     schema = hxl.schema([
         ['#valid_tag', '#valid_correlation'],
         ['#foo', '#bar']
     ], callback)
     data = hxl.data([
         ['#foo', '#bar'],
         ['yy', 'yyy'],
         ['yy', 'yyy'],
         ['xx', 'xxx'],
         ['xx', 'xxx'],
         ['xx', 'yyy'],
     ])
     self.assertFalse(schema.validate(data))
示例#19
0
def do_validate(source, schema_url=None, severity_level=None):
    """Validate a source, and return a list of errors."""
    min_severity = SEVERITY_LEVELS.get(severity_level, -1)
    errors = {}
    def callback(error):
        if SEVERITY_LEVELS.get(error.rule.severity, 0) >= min_severity:
            rule_hash = make_rule_hash(error.rule)
            if errors.get(rule_hash) is None:
                errors[rule_hash] = []
            errors[rule_hash].append(error)
    schema = hxl.schema(schema_url, callback)
    counter = source.row_counter()
    result = schema.validate(counter)
    if counter.row_count == 0:
        return False
    else:
        return errors
    def test_default_schema(self):
        """Test the built-in schema"""
        DATASET = [
            ['#affected', '#date'],
            ['100', '2018-01-01'],  # OK
            ['200', 'xxx'],  # bad date
            ['xxx', '2018-03-01'],  # bad number
            ['100', ' 2018-04-01 '],  # extra whitespace
        ]
        errors_seen = 0

        def callback(e):
            nonlocal errors_seen
            errors_seen += 1

        self.assertFalse(
            hxl.schema(callback=callback).validate(hxl.data(DATASET)))
        self.assertEqual(3, errors_seen)
示例#21
0
def do_validate(source, schema_url=None, severity_level=None):
    """Validate a source, and return a list of errors."""
    min_severity = SEVERITY_LEVELS.get(severity_level, -1)
    errors = {}

    def callback(error):
        if SEVERITY_LEVELS.get(error.rule.severity, 0) >= min_severity:
            rule_hash = make_rule_hash(error.rule)
            if errors.get(rule_hash) is None:
                errors[rule_hash] = []
            errors[rule_hash].append(error)

    schema = hxl.schema(schema_url, callback)
    counter = source.row_counter()
    result = schema.validate(counter)
    if counter.row_count == 0:
        return False
    else:
        return errors
    def test_consistent_datatype(self):
        def callback(e):
            # expect that 'xxx' will be the bad value
            self.assertEqual('xxx', e.value)

        schema = hxl.schema([
            ['#valid_tag', '#valid_datatype+consistent'],
            ['#affected', 'true']
        ], callback=callback)

        data = hxl.data([
            ['#affected'],
            ['100'],
            ['xxx'],
            ['200'],
            ['800']
        ])

        self.assertFalse(schema.validate(data))
    def test_outliers(self):
        BAD_VALUES = ['1', '1000000']

        raw_data = [['#affected'], ['1'], ['1000000']]

        for i in range(0, 10):
            raw_data += [['100'], ['200'], ['800']]

        seen_callback = False

        def callback(e):
            nonlocal seen_callback
            seen_callback = True
            self.assertTrue(e.value in BAD_VALUES)

        schema = hxl.schema(
            [['#valid_tag', '#valid_value+outliers'], ['#affected', 'true']],
            callback=callback)

        data = hxl.data(raw_data)

        self.assertFalse(schema.validate(data))
        self.assertTrue(seen_callback)
示例#24
0
 def test_load_bad(self):
     schema = hxl.schema(SCHEMA_BASIC)
     self.assertFalse(schema.validate(hxl.data(DATA_BAD)))
示例#25
0
 def test_load_default(self):
     schema = hxl.schema()
     self.assertTrue(0 < len(schema.rules))
     self.assertTrue(schema.validate(hxl.data(DATA_GOOD)))
示例#26
0
 def test_load_good(self):
     schema = hxl.schema(SCHEMA_BASIC)
     self.assertTrue(schema.validate(hxl.data(DATA_GOOD)))
 def test_taxonomy_all(self):
     schema = hxl.schema(SCHEMA_TAXONOMY_ALL)
     result = hxl.validate(hxl.data(DATA_TAXONOMY_GOOD), schema)
     self.assertTrue(result['is_valid'])
     self.assertEqual(0, result['stats']['error'])
 def test_taxonomy_good(self):
     schema = hxl.schema(SCHEMA_TAXONOMY)
     self.assertTrue(schema.validate(hxl.data(DATA_TAXONOMY_GOOD)))
 def test_taxonomy_all(self):
     schema = hxl.schema(SCHEMA_TAXONOMY_ALL)
     result = hxl.validate(hxl.data(DATA_TAXONOMY_GOOD), schema)
     self.assertTrue(result['is_valid'])
     self.assertEqual(0, result['stats']['error'])
 def test_load_default(self):
     schema = hxl.schema()
     self.assertTrue(0 < len(schema.rules))
     self.assertTrue(schema.validate(hxl.data(DATA_GOOD)))
 def test_taxonomy_good(self):
     schema = hxl.schema(SCHEMA_TAXONOMY)
     self.assertTrue(schema.validate(hxl.data(DATA_TAXONOMY_GOOD)))
 def test_load_bad(self):
     schema = hxl.schema(SCHEMA_BASIC)
     self.assertFalse(schema.validate(hxl.data(DATA_BAD)))
 def test_load_good(self):
     schema = hxl.schema(SCHEMA_BASIC)
     self.assertTrue(schema.validate(hxl.data(DATA_GOOD)))