def test_different_indicator_datatypes(self):
        """One rule, but three different indicators with different tagspecs and datatypes"""
        SCHEMA = [
            ['#valid_tag', '#valid_datatype+consistent'],
            ['#indicator', 'true']
        ]
        GOOD_DATA = [
            ['#indicator+xxx', '#indicator+yyy', '#indicator+zzz'],
            ['100', 'aaa', '100'],
            ['200', '', '200'],
            ['300', '', '300'],
            ['400', '', '400'],
            ['500', '', '500']
        ]
        BAD_DATA = [
            ['#indicator+xxx', '#indicator+yyy', '#indicator+zzz'],
            ['100', 'aaa', '100'],
            ['200', '2', '200'],
            ['300', '3', '300'],
            ['400', '4', '400'],
            ['500', '5', '500']
        ]

        report = hxl.validate(GOOD_DATA, SCHEMA)
        self.assertTrue(report['is_valid'])
        self.assertEqual(0, report['stats']['total'])

        report = hxl.validate(BAD_DATA, SCHEMA)
        self.assertFalse(report['is_valid'])
        self.assertEqual(1, report['stats']['total'])
 def test_taxonomy_bad(self):
     schema = hxl.schema(SCHEMA_TAXONOMY)
     result = hxl.validate(hxl.data(DATA_TAXONOMY_BAD), schema)
     self.assertFalse(result['is_valid'])
     self.assertEqual(1, result['stats']['error'])
     self.assertEqual(0, result['stats']['external'])
     self.assertEqual(1, len(result['issues']))
     self.assertEqual(0, len(result['external_issues']))
 def test_taxonomy_bad(self):
     schema = hxl.schema(SCHEMA_TAXONOMY)
     result = hxl.validate(hxl.data(DATA_TAXONOMY_BAD), schema)
     self.assertFalse(result['is_valid'])
     self.assertEqual(1, result['stats']['error'])
     self.assertEqual(0, result['stats']['external'])
     self.assertEqual(1, len(result['issues']))
     self.assertEqual(0, len(result['external_issues']))
 def test_spellings_multiple(self):
     SCHEMA = [
         ['#valid_tag', '#valid_value+spelling'],
         ['#indicator', 'true']
     ]
     DATA = [['#indicator+xxx', '#indicator+yyy']] + [['aaaaa', 'aaaab'] for n in range(0,50)] + [['aaaab', 'aaaaa']]
     report = hxl.validate(DATA, SCHEMA)
     self.assertFalse(report['is_valid'])
     self.assertEqual(2, report['stats']['total'])
 def test_spellings_multiple(self):
     SCHEMA = [['#valid_tag', '#valid_value+spelling'],
               ['#indicator', 'true']]
     DATA = [['#indicator+xxx', '#indicator+yyy']
             ] + [['aaaaa', 'aaaab']
                  for n in range(0, 50)] + [['aaaab', 'aaaaa']]
     report = hxl.validate(DATA, SCHEMA)
     self.assertFalse(report['is_valid'])
     self.assertEqual(2, report['stats']['total'])
 def test_taxonomy_missing(self):
     """Handle a missing external taxonomy."""
     schema = hxl.schema(SCHEMA_TAXONOMY_MISSING)
     result = hxl.validate(hxl.data(DATA_TAXONOMY_GOOD), schema)
     self.assertTrue(result['is_valid'])
     self.assertTrue('external_issues' in result)
     self.assertEqual(0, result['stats']['error'])
     self.assertEqual(1, result['stats']['external'])
     self.assertEqual(0, len(result['issues']))
     self.assertEqual(1, len(result['external_issues']))
 def test_taxonomy_missing(self):
     """Handle a missing external taxonomy."""
     schema = hxl.schema(SCHEMA_TAXONOMY_MISSING)
     result = hxl.validate(hxl.data(DATA_TAXONOMY_GOOD), schema)
     self.assertTrue(result['is_valid'])
     self.assertTrue('external_issues' in result)
     self.assertEqual(0, result['stats']['error'])
     self.assertEqual(1, result['stats']['external'])
     self.assertEqual(0, len(result['issues']))
     self.assertEqual(1, len(result['external_issues']))
    def test_different_indicator_datatypes(self):
        """One rule, but three different indicators with different tagspecs and datatypes"""
        SCHEMA = [['#valid_tag', '#valid_datatype+consistent'],
                  ['#indicator', 'true']]
        GOOD_DATA = [['#indicator+xxx', '#indicator+yyy', '#indicator+zzz'],
                     ['100', 'aaa', '100'], ['200', '', '200'],
                     ['300', '', '300'], ['400', '', '400'],
                     ['500', '', '500']]
        BAD_DATA = [['#indicator+xxx', '#indicator+yyy', '#indicator+zzz'],
                    ['100', 'aaa', '100'], ['200', '2', '200'],
                    ['300', '3', '300'], ['400', '4', '400'],
                    ['500', '5', '500']]

        report = hxl.validate(GOOD_DATA, SCHEMA)
        self.assertTrue(report['is_valid'])
        self.assertEqual(0, report['stats']['total'])

        report = hxl.validate(BAD_DATA, SCHEMA)
        self.assertFalse(report['is_valid'])
        self.assertEqual(1, report['stats']['total'])
 def test_double_correlation(self):
     """Test correlation when more than one column has same tagspec"""
     SCHEMA = [
         ['#valid_tag', '#description', '#valid_correlation', '#valid_value+list'],
         ['#adm1+code', 'xxxxx', '#adm1+name', 'X001|X002']
     ]
     DATASET = [
         ['#adm1+name', '#adm1+code', '#adm1+code'],
         ['Coast', 'X001', 'X001'],
         ['Plains', 'X002', 'X02'],
         ['Plains', 'X002', 'X02'],
         ['Plains', 'X002', 'X02'],
         ['Plains', 'X002', 'X02'],
     ]
     report = hxl.validate(DATASET, SCHEMA)
     self.assertEqual(4, report['stats']['total'])
 def test_double_correlation(self):
     """Test correlation when more than one column has same tagspec"""
     SCHEMA = [[
         '#valid_tag', '#description', '#valid_correlation',
         '#valid_value+list'
     ], ['#adm1+code', 'xxxxx', '#adm1+name', 'X001|X002']]
     DATASET = [
         ['#adm1+name', '#adm1+code', '#adm1+code'],
         ['Coast', 'X001', 'X001'],
         ['Plains', 'X002', 'X02'],
         ['Plains', 'X002', 'X02'],
         ['Plains', 'X002', 'X02'],
         ['Plains', 'X002', 'X02'],
     ]
     report = hxl.validate(DATASET, SCHEMA)
     self.assertEqual(4, report['stats']['total'])
 def test_taxonomy_all(self):
     schema = hxl.schema(SCHEMA_TAXONOMY_ALL)
     result = hxl.validate(hxl.data(DATA_TAXONOMY_GOOD), schema)
     self.assertTrue(result['is_valid'])
     self.assertEqual(0, result['stats']['error'])
示例#12
0
import hxl

DATA_FILE = 'data/unhcr_popstats_export_persons_of_concern_all_data.hxl'

hxl.validate(hxl.io.make_input(DATA_FILE, allow_local=True))
 def test_top_level(self):
     """Use the package-level alias"""
     report = hxl.validate(self.DATA)
 def test_taxonomy_all(self):
     schema = hxl.schema(SCHEMA_TAXONOMY_ALL)
     result = hxl.validate(hxl.data(DATA_TAXONOMY_GOOD), schema)
     self.assertTrue(result['is_valid'])
     self.assertEqual(0, result['stats']['error'])
 def test_top_level(self):
     """Use the package-level alias"""
     report = hxl.validate(self.DATA)
示例#16
0
def run_validation(url, content, content_hash, sheet_index, selector,
                   schema_url, schema_content, schema_content_hash,
                   schema_sheet_index, include_dataset):
    """ Do the actual validation run, using the arguments provided.
    Separated from the controller so that we can cache the result easiler.
    The *_hash arguments exist only to assist with caching.
    @returns: a validation report, suitable for returning as JSON.
    """

    # test for opening error conditions
    if (url is not None and content is not None):
        raise requests.exceptions.BadRequest(
            "Both 'url' and 'content' specified")
    if (url is None and content is None):
        raise requests.exceptions.BadRequest(
            "Require one of 'url' or 'content'")
    if (schema_url is not None and schema_content is not None):
        raise requests.exceptions.BadRequest(
            "Both 'schema_url' and 'schema_content' specified")

    # set up the main data
    if content:
        source = hxl.data(
            hxl.io.make_input(content,
                              sheet_index=sheet_index,
                              selector=selector))
    else:
        source = hxl.data(url,
                          sheet_index=sheet_index,
                          http_headers={'User-Agent': 'hxl-proxy/validation'})

    # cache if we're including the dataset in the results (we have to run over it twice)
    if include_dataset:
        source = source.cache()

    # set up the schema (if present)
    if schema_content:
        schema_source = hxl.data(
            hxl.io.make_input(schema_content,
                              sheet_index=schema_sheet_index,
                              selector=selector))
    elif schema_url:
        schema_source = hxl.data(
            schema_url,
            sheet_index=schema_sheet_index,
            http_headers={'User-Agent': 'hxl-proxy/validation'})
    else:
        schema_source = None

    # Validate the dataset
    report = hxl.validate(source, schema_source)

    # add the URLs if supplied
    if url:
        report['data_url'] = url
    if sheet_index is not None:
        report['data_sheet_index'] = sheet_index
    if schema_url:
        report['schema_url'] = schema_url
    if schema_sheet_index is not None:
        report['schema_sheet_index'] = schema_sheet_index

    # include the original dataset if requested
    if include_dataset:
        content = []
        content.append([
            hxl_proxy.util.no_none(column.header) for column in source.columns
        ])
        content.append([
            hxl_proxy.util.no_none(column.display_tag)
            for column in source.columns
        ])
        for row in source:
            content.append(
                [hxl_proxy.util.no_none(value) for value in row.values])
        report['dataset'] = content

    return report
import hxl

DATA_FILE='data/unhcr_popstats_export_persons_of_concern_all_data.hxl'

hxl.validate(hxl.io.make_input(DATA_FILE, allow_local=True))