def test_value_checks_datetime_range(): """Test value checks with datetime ranges.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check( 'bar', datetime_range_inclusive('1999-09-09', '2009-09-09', '%Y-%m-%d')) validator.add_value_check( 'bar', datetime_range_exclusive('1999-09-09', '2009-09-09', '%Y-%m-%d')) data = ( ('foo', 'bar'), ('A', '1999-09-10'), # valid ('B', '1999-09-09'), # invalid (exclusive) ('C', '2009-09-09'), # invalid (exclusive) ('D', '1999-09-08'), # invalid (both) ('E', '2009-09-10') # invalid (both) ) problems = validator.validate(data) assert len(problems) == 6, len(problems) assert len([p for p in problems if p['row'] == 3]) == 1 assert len([p for p in problems if p['row'] == 4]) == 1 assert len([p for p in problems if p['row'] == 5]) == 2 assert len([p for p in problems if p['row'] == 6]) == 2
def test_ignore_lines(): """Test instructions to ignore lines works.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_header_check() validator.add_value_check('foo', int) validator.add_value_check('bar', float) data = ( ('ignore', 'me', 'please'), ('ignore', 'me', 'too', 'please'), ('foo', 'baz'), ('1.2', 'abc') ) problems = validator.validate(data, ignore_lines=2) assert len(problems) == 3 header_problems = [p for p in problems if p['code'] == HEADER_CHECK_FAILED] assert len(header_problems) == 1 assert header_problems[0]['row'] == 3 value_problems = [p for p in problems if p['code'] == VALUE_CHECK_FAILED] assert len(value_problems) == 2 for p in value_problems: assert p['row'] == 4
def test_value_check_enumeration(): """Test value checks with the enumeration() function.""" field_names = ('foo', 'bar', 'baz') validator = CSVValidator(field_names) # define an enumeration directly with arguments validator.add_value_check('bar', enumeration('M', 'F')) # define an enumeration by passing in a list or tuple flavours = ('chocolate', 'vanilla', 'strawberry') validator.add_value_check('baz', enumeration(flavours)) data = (('foo', 'bar', 'baz'), ('1', 'M', 'chocolate'), ('2', 'F', 'maple pecan'), ('3', 'X', 'strawberry')) problems = validator.validate(data) assert len(problems) == 2 p0 = problems[0] assert p0['code'] == VALUE_CHECK_FAILED assert p0['row'] == 3 assert p0['column'] == 3 assert p0['field'] == 'baz' assert p0['value'] == 'maple pecan' assert p0['record'] == ('2', 'F', 'maple pecan') p1 = problems[1] assert p1['code'] == VALUE_CHECK_FAILED assert p1['row'] == 4 assert p1['column'] == 2 assert p1['field'] == 'bar' assert p1['value'] == 'X' assert p1['record'] == ('3', 'X', 'strawberry')
def test_ignore_lines(): """Test instructions to ignore lines works.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_header_check() validator.add_value_check('foo', int) validator.add_value_check('bar', float) data = ( ('ignore', 'me', 'please'), ('ignore', 'me', 'too', 'please'), ('foo', 'baz'), ('1.2', 'abc') ) problems = validator.validate(data, ignore_lines=2) assert len(problems) == 3 header_problems = [p for p in problems if p['code'] == HEADER_CHECK_FAILED] assert len(header_problems) == 1 assert header_problems[0]['row'] == 3 value_problems = [p for p in problems if p['code'] == VALUE_CHECK_FAILED] assert len(value_problems) == 2 for p in value_problems: assert p['row'] == 4
def create_validator(): """Create an example CSV validator for patient demographic data.""" field_names = ('study_id', 'patient_id', 'gender', 'age_years', 'age_months', 'date_inclusion') validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') # some simple value checks validator.add_value_check('study_id', int, 'EX3', 'study id must be an integer') validator.add_value_check('patient_id', int, 'EX4', 'patient id must be an integer') validator.add_value_check('gender', enumeration('M', 'F'), 'EX5', 'invalid gender') validator.add_value_check('age_years', number_range_inclusive(0, 120, int), 'EX6', 'invalid age in years') validator.add_value_check('date_inclusion', datetime_string('%Y-%m-%d'), 'EX7', 'invalid date') # a more complicated record check def check_age_variables(r): age_years = int(r['age_years']) age_months = int(r['age_months']) valid = (age_months >= age_years * 12 and age_months % age_years < 12) if not valid: raise RecordError('EX8', 'invalid age variables') validator.add_record_check(check_age_variables) return validator
def test_value_checks_datetime_range(): """Test value checks with datetime ranges.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', datetime_range_inclusive('1999-09-09', '2009-09-09', '%Y-%m-%d')) validator.add_value_check('bar', datetime_range_exclusive('1999-09-09', '2009-09-09', '%Y-%m-%d')) data = ( ('foo', 'bar'), ('A', '1999-09-10'), # valid ('B', '1999-09-09'), # invalid (exclusive) ('C', '2009-09-09'), # invalid (exclusive) ('D', '1999-09-08'), # invalid (both) ('E', '2009-09-10') # invalid (both) ) problems = validator.validate(data) assert len(problems) == 6, len(problems) assert len([p for p in problems if p['row'] == 3]) == 1 assert len([p for p in problems if p['row'] == 4]) == 1 assert len([p for p in problems if p['row'] == 5]) == 2 assert len([p for p in problems if p['row'] == 6]) == 2
def generate(self): validator = CSVValidator(self.field_names) validator.add_header_check() validator.add_record_length_check() for value, check in self.value_checks: validator.add_value_check(value, check) validator.add_unique_check(self.unique_checks) return validator
def test_guard_conditions(): """Test some guard conditions.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) try: validator.add_value_check('foo', 'i am not callable') except AssertionError: pass # expected else: assert False, 'expected exception'
def test_guard_conditions(): """Test some guard conditions.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) try: validator.add_value_check('foo', 'i am not callable') except AssertionError: pass # expected else: assert False, 'expected exception'
def validate_csv(csv): field_names = ("provider_id", "kind", "name", "dosage", "measure", "amount") validator = CSVValidator(field_names) kinds = [kind[0] for kind in Medicine.KIND] validator.add_header_check("HEADER", "bad header") validator.add_value_check("provider_id", int, "PROVIDER", "provider_id must be an integer") validator.add_value_check("kind", enumeration(*kinds), "PROVIDER", f"kind must be {str(kinds)}") return validator.validate(csv)
def test_skips(): """Test skip functions.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_record_length_check() validator.add_value_check('foo', int) def skip_pragma(record): return record[0].startswith('##') validator.add_skip(skip_pragma) data = (('foo', 'bar'), ('1', 'X'), ('## this row', 'should be', 'skipped'), ('3', 'Y')) problems = validator.validate(data) assert len(problems) == 0, problems
def test_value_check_search_pattern(): """Test value checks with the search_pattern() function.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', search_pattern('\d{4}-\d{2}-\d{2}')) data = ( ('foo', 'bar'), ('1', '1999-01-01'), ('2', 'abcd-ef-gh'), ('3', 'a1999-01-01'), # this is valid - pattern attempts to match anywhere in line ('4', '1999-01-01a') # this is valid - pattern attempts to match anywhere in line ) problems = validator.validate(data) assert len(problems) == 1, len(problems) assert problems[0]['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 3
def test_value_check_search_pattern(): """Test value checks with the search_pattern() function.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', search_pattern('\d{4}-\d{2}-\d{2}')) data = ( ('foo', 'bar'), ('1', '1999-01-01'), ('2', 'abcd-ef-gh'), ('3', 'a1999-01-01'), # this is valid - pattern attempts to match anywhere in line ('4', '1999-01-01a') # this is valid - pattern attempts to match anywhere in line ) problems = validator.validate(data) assert len(problems) == 1, len(problems) assert problems[0]['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 3
def test_value_checks_with_missing_values(): """ Establish expected behaviour for value checks where there are missing values in the records. """ field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', float) data = ( ('foo', 'bar'), ('12', ) # this is missing value for bar, what happens to value check? ) problems = validator.validate(data) # missing values are ignored - use record length checks to find these assert len(problems) == 0
def test_value_checks_with_missing_values(): """ Establish expected behaviour for value checks where there are missing values in the records. """ field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', float) data = ( ('foo', 'bar'), ('12',) # this is missing value for bar, what happens to value check? ) problems = validator.validate(data) # missing values are ignored - use record length checks to find these assert len(problems) == 0
def create_validator(): """Create an example CSV validator for patient demographic data.""" field_names = ( 'study_id', 'patient_id', 'gender', 'age_years', 'age_months', 'date_inclusion' ) validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') # some simple value checks validator.add_value_check('study_id', int, 'EX3', 'study id must be an integer') validator.add_value_check('patient_id', int, 'EX4', 'patient id must be an integer') validator.add_value_check('gender', enumeration('M', 'F'), 'EX5', 'invalid gender') validator.add_value_check('age_years', number_range_inclusive(0, 120, int), 'EX6', 'invalid age in years') validator.add_value_check('date_inclusion', datetime_string('%Y-%m-%d'), 'EX7', 'invalid date') # a more complicated record check def check_age_variables(r): age_years = int(r['age_years']) age_months = int(r['age_months']) valid = (age_months >= age_years * 12 and age_months % age_years < 12) if not valid: raise RecordError('EX8', 'invalid age variables') validator.add_record_check(check_age_variables) return validator
def test_value_check_numeric_ranges(): """Test value checks with numerical range functions.""" field_names = ('foo', 'bar', 'baz', 'quux') validator = CSVValidator(field_names) validator.add_value_check('foo', number_range_inclusive(2, 6, int)) validator.add_value_check('bar', number_range_exclusive(2, 6, int)) validator.add_value_check('baz', number_range_inclusive(2.0, 6.3, float)) validator.add_value_check('quux', number_range_exclusive(2.0, 6.3, float)) data = ( ('foo', 'bar', 'baz', 'quux'), ('2', '3', '2.0', '2.1'), # valid ('1', '3', '2.0', '2.1'), # foo invalid ('2', '2', '2.0', '2.1'), # bar invalid ('2', '3', '1.9', '2.1'), # baz invalid ('2', '3', '2.0', '2.0') # quux invalid ) problems = validator.validate(data) assert len(problems) == 4, len(problems) for p in problems: assert p['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 3 and problems[0]['field'] == 'foo' assert problems[1]['row'] == 4 and problems[1]['field'] == 'bar' assert problems[2]['row'] == 5 and problems[2]['field'] == 'baz' assert problems[3]['row'] == 6 and problems[3]['field'] == 'quux'
def test_value_check_numeric_ranges(): """Test value checks with numerical range functions.""" field_names = ('foo', 'bar', 'baz', 'quux') validator = CSVValidator(field_names) validator.add_value_check('foo', number_range_inclusive(2, 6, int)) validator.add_value_check('bar', number_range_exclusive(2, 6, int)) validator.add_value_check('baz', number_range_inclusive(2.0, 6.3, float)) validator.add_value_check('quux', number_range_exclusive(2.0, 6.3, float)) data = ( ('foo', 'bar', 'baz', 'quux'), ('2', '3', '2.0', '2.1'), # valid ('1', '3', '2.0', '2.1'), # foo invalid ('2', '2', '2.0', '2.1'), # bar invalid ('2', '3', '1.9', '2.1'), # baz invalid ('2', '3', '2.0', '2.0') # quux invalid ) problems = validator.validate(data) assert len(problems) == 4, len(problems) for p in problems: assert p['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 3 and problems[0]['field'] == 'foo' assert problems[1]['row'] == 4 and problems[1]['field'] == 'bar' assert problems[2]['row'] == 5 and problems[2]['field'] == 'baz' assert problems[3]['row'] == 6 and problems[3]['field'] == 'quux'
def test_value_check_match_pattern(): """Test value checks with the match_pattern() function.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', match_pattern('\d{4}-\d{2}-\d{2}')) data = ( ('foo', 'bar'), ('1', '1999-01-01'), ('2', 'abcd-ef-gh'), ('3', 'a1999-01-01'), ('4', '1999-01-01a') # this is valid - pattern attempts to match at beginning of line ) problems = validator.validate(data) assert len(problems) == 2, len(problems) for p in problems: assert p['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 3 assert problems[1]['row'] == 4
def test_skips(): """Test skip functions.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_record_length_check() validator.add_value_check('foo', int) def skip_pragma(record): return record[0].startswith('##') validator.add_skip(skip_pragma) data = ( ('foo', 'bar'), ('1', 'X'), ('## this row', 'should be', 'skipped'), ('3', 'Y') ) problems = validator.validate(data) assert len(problems) == 0, problems
def test_value_check_match_pattern(): """Test value checks with the match_pattern() function.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', match_pattern('\d{4}-\d{2}-\d{2}')) data = ( ('foo', 'bar'), ('1', '1999-01-01'), ('2', 'abcd-ef-gh'), ('3', 'a1999-01-01'), ('4', '1999-01-01a') # this is valid - pattern attempts to match at beginning of line ) problems = validator.validate(data) assert len(problems) == 2, len(problems) for p in problems: assert p['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 3 assert problems[1]['row'] == 4
def test_value_checks_datetime_parser(): """Test value checks with datetimes parser.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', datetime_parse()) data = ( ('foo', 'bar'), ('A', '1999-09-09'), # valid ('B', '31-12-2009'), # valid ('C', '12-31-2032'), # valid ('D', '1999-09-09ss'), # invalid string ) problems = validator.validate(data) print(problems) assert len(problems) == 1, problems for p in problems: assert p['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 5 and problems[0]['field'] == 'bar'
def test_value_checks_datetime_parser_year(): """Test value checks with datetimes parser yearfirst.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', datetime_parse(yearfirst=True)) data = ( ('foo', 'bar'), ('A', '1999-09-09'), # valid ('B', '1999-13-09'), # invalid month ('C', '1999-09-32'), # invalid day ('D', '1999-09-09ss') # invalid string ) problems = validator.validate(data) assert len(problems) == 3, problems for p in problems: assert p['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 3 and problems[0]['field'] == 'bar' assert problems[1]['row'] == 4 and problems[1]['field'] == 'bar' assert problems[2]['row'] == 5 and problems[2]['field'] == 'bar'
def test_value_checks_datetime_parser_day(): """Test value checks with datetimes parser.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', datetime_parse(dayfirst=True)) data = ( ('foo', 'bar'), ('A', '01-09-1991'), # valid ('B', '05-24-1999'), # valid ('C', '33-01-1991'), # invalid day ('D', '1999-09-09ss'), # invalid string ('E', '31-12-1999'), # valid ) problems = validator.validate(data) assert len(problems) == 2, problems for p in problems: assert p['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 4 and problems[0]['field'] == 'bar' assert problems[1]['row'] == 5 and problems[1]['field'] == 'bar'
def test_value_checks_datetime(): """Test value checks with datetimes.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', datetime_string('%Y-%m-%d')) data = ( ('foo', 'bar'), ('A', '1999-09-09'), # valid ('B', '1999-13-09'), # invalid month ('C', '1999-09-32'), # invalid day ('D', '1999-09-09ss') # invalid string ) problems = validator.validate(data) assert len(problems) == 3, problems for p in problems: assert p['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 3 and problems[0]['field'] == 'bar' assert problems[1]['row'] == 4 and problems[1]['field'] == 'bar' assert problems[2]['row'] == 5 and problems[2]['field'] == 'bar'
def test_value_check_enumeration(): """Test value checks with the enumeration() function.""" field_names = ('foo', 'bar', 'baz') validator = CSVValidator(field_names) # define an enumeration directly with arguments validator.add_value_check('bar', enumeration('M', 'F')) # define an enumeration by passing in a list or tuple flavours = ('chocolate', 'vanilla', 'strawberry') validator.add_value_check('baz', enumeration(flavours)) data = ( ('foo', 'bar', 'baz'), ('1', 'M', 'chocolate'), ('2', 'F', 'maple pecan'), ('3', 'X', 'strawberry') ) problems = validator.validate(data) assert len(problems) == 2 p0 = problems[0] assert p0['code'] == VALUE_CHECK_FAILED assert p0['row'] == 3 assert p0['column'] == 3 assert p0['field'] == 'baz' assert p0['value'] == 'maple pecan' assert p0['record'] == ('2', 'F', 'maple pecan') p1 = problems[1] assert p1['code'] == VALUE_CHECK_FAILED assert p1['row'] == 4 assert p1['column'] == 2 assert p1['field'] == 'bar' assert p1['value'] == 'X' assert p1['record'] == ('3', 'X', 'strawberry')
def create_validator(): """Create an example CSV validator for patient demographic data.""" # def CheckAlpha(s=''): if len(s) > 0: # s=FixString(s) if not s.replace(" ","").isalpha() and len(s) > 0: return False # Logit("CheckAlpha: LineNo - " + str(ln+1) + " | Mem ID - " + curline[0] + " |" + COLDESC[(i+1)] +" - " + s + " :: Not a alphabetic letter.") return True field_names = ( 'CUSTID', 'FIRSTNAME', 'LASTNAME', 'CUSTNM', 'ADDRESS1', 'ADDRESS2', 'POSTCODE', 'CITY', 'STATE', 'WORKPHONE', 'WORKFAX', 'PHONE', 'MOBILE', 'EMAIL', 'ORGANISATION', 'EOL' ) validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') # some simple value checks validator.add_value_check('CUSTID', int, 'EX3', 'CUSTID must be an integer') validator.add_value_check('FIRSTNAME', CheckAlpha, 'EX4', 'FIRSTNAME must be an integer') validator.add_value_check('LASTNAME', str, 'EX5', 'invalid LASTNAME') validator.add_value_check('CUSTNM', str, 'EX6', 'invalid CUSTNM') validator.add_value_check('ADDRESS1', str, 'EX7', 'invalid ADDRESS1') validator.add_value_check('ADDRESS2', str, 'EX8', 'invalid ADDRESS2') validator.add_value_check('POSTCODE', int, 'EX9', 'invalid POSTCODE') validator.add_value_check('CITY', str, 'EX10', 'invalid CITY') validator.add_value_check('STATE', str, 'EX11', 'invalid STATE') validator.add_value_check('WORKPHONE', int, 'EX12', 'invalid WORKPHONE') validator.add_value_check('WORKFAX', int, 'EX13', 'invalid WORKFAC') validator.add_value_check('PHONE', int, 'EX14', 'invalid PHONE') validator.add_value_check('MOBILE', int, 'EX15', 'invalid MOBILE') validator.add_value_check('EMAIL', str, 'EX16', 'invalid EMAIL') validator.add_value_check('ORGANISATION', str, 'EX17', 'invalid ORGANISATION') validator.add_value_check('EOL', str, 'EX18', 'invalid EOL') # a more complicated record check def check_age_variables(r): CUSTNM = int(r['CUSTNM']) ADDRESS1 = int(r['ADDRESS1']) valid = (ADDRESS1 >= CUSTNM * 12 and ADDRESS1 % CUSTNM < 12) if not valid: raise RecordError('EX8', 'invalid age variables') validator.add_record_check(check_age_variables) return validator
def test_exception_handling(): """Establish expectations for exception handling.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('foo', int) def buggy_value_check(v): """I am a buggy value check.""" raise Exception('something went wrong') validator.add_value_check('bar', buggy_value_check) def buggy_value_predicate(v): """I am a buggy value predicate.""" raise Exception('something went wrong') validator.add_value_predicate('bar', buggy_value_predicate) def buggy_record_check(r): """I am a buggy record check.""" raise Exception('something went wrong') validator.add_record_check(buggy_record_check) def buggy_record_predicate(r): """I am a buggy record predicate.""" raise Exception('something went wrong') validator.add_record_predicate(buggy_record_predicate) def buggy_assert(r): """I am a buggy assert.""" raise Exception('something went wrong') validator.assert_something_buggy = buggy_assert def buggy_check(r): """I am a buggy check.""" raise Exception('something went wrong') validator.check_something_buggy = buggy_check def buggy_each(r): """I am a buggy each.""" raise Exception('something went wrong') validator.each_something_buggy = buggy_each def buggy_finally_assert(): """I am a buggy finally assert.""" raise Exception('something went wrong') validator.finally_assert_something_buggy = buggy_finally_assert def buggy_skip(record): """I am a buggy skip.""" raise Exception('something went wrong') validator.add_skip(buggy_skip) data = ( ('foo', 'bar'), ('ab', '56') ) problems = validator.validate(data, report_unexpected_exceptions=False) n = len(problems) assert n == 1, n p = problems[0] assert p['row'] == 2 problems = validator.validate(data) # by default, exceptions are reported as problems n = len(problems) assert n == 10, n unexpected_problems = [p for p in problems if p['code'] == UNEXPECTED_EXCEPTION] assert len(unexpected_problems) == 9 for p in unexpected_problems: e = p['exception'] assert e.args[0] == 'something went wrong', e.args
def test_value_checks(): """Some very simple tests of value checks.""" # a simple validator to be tested field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('foo', int) validator.add_value_check('bar', float) # some test data data = ( ('foo', 'bar'), # row 1 - header row ('12', '3.4'), # row 2 - valid ('1.2', '3.4'), # row 3 - foo invalid ('abc', '3.4'), # row 4 - foo invalid ('12', 'abc'), # row 5 - bar invalid ('', '3.4'), # row 6 - foo invalid (empty) ('12', ''), # row 7 - bar invalid (empty) ('abc', 'def') # row 8 - both invalid ) # run the validator on the test data problems = validator.validate(data) assert len(problems) == 7 # N.B., expect row and column indices start from 1 problems_row2 = [p for p in problems if p['row'] == 2] assert len(problems_row2) == 0 # should be valid problems_row3 = [p for p in problems if p['row'] == 3] assert len(problems_row3) == 1 p = problems_row3[0] # convenience variable assert p['column'] == 1 # report column index assert p['field'] == 'foo' # report field name assert p[ 'code'] == VALUE_CHECK_FAILED # default problem code for value checks assert p['message'] == MESSAGES[VALUE_CHECK_FAILED] # default message assert p['value'] == '1.2' # report bad value assert p['record'] == ('1.2', '3.4') # report record problems_row4 = [p for p in problems if p['row'] == 4] assert len(problems_row4) == 1 p = problems_row4[0] # convenience variable assert p['column'] == 1 assert p['field'] == 'foo' assert p['code'] == VALUE_CHECK_FAILED assert p['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p['value'] == 'abc' assert p['record'] == ('abc', '3.4') problems_row5 = [p for p in problems if p['row'] == 5] assert len(problems_row5) == 1 p = problems_row5[0] # convenience variable assert p['column'] == 2 assert p['field'] == 'bar' assert p['code'] == VALUE_CHECK_FAILED assert p['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p['value'] == 'abc' assert p['record'] == ('12', 'abc') problems_row6 = [p for p in problems if p['row'] == 6] assert len(problems_row6) == 1 p = problems_row6[0] # convenience variable assert p['column'] == 1 assert p['field'] == 'foo' assert p['code'] == VALUE_CHECK_FAILED assert p['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p['value'] == '' assert p['record'] == ('', '3.4') problems_row7 = [p for p in problems if p['row'] == 7] assert len(problems_row7) == 1 p = problems_row7[0] # convenience variable assert p['column'] == 2 assert p['field'] == 'bar' assert p['code'] == VALUE_CHECK_FAILED assert p['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p['value'] == '' assert p['record'] == ('12', '') problems_row8 = [p for p in problems if p['row'] == 8] assert len(problems_row8) == 2 # expect both problems are found p0 = problems_row8[0] # convenience variable assert p0['column'] == 1 assert p0['field'] == 'foo' assert p0['code'] == VALUE_CHECK_FAILED assert p0['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p0['value'] == 'abc' assert p0['record'] == ('abc', 'def') p1 = problems_row8[1] # convenience variable assert p1['column'] == 2 assert p1['field'] == 'bar' assert p1['code'] == VALUE_CHECK_FAILED assert p1['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p1['value'] == 'def' assert p1['record'] == ('abc', 'def')
def test_exception_handling(): """Establish expectations for exception handling.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('foo', int) def buggy_value_check(v): """I am a buggy value check.""" raise Exception('something went wrong') validator.add_value_check('bar', buggy_value_check) def buggy_value_predicate(v): """I am a buggy value predicate.""" raise Exception('something went wrong') validator.add_value_predicate('bar', buggy_value_predicate) def buggy_record_check(r): """I am a buggy record check.""" raise Exception('something went wrong') validator.add_record_check(buggy_record_check) def buggy_record_predicate(r): """I am a buggy record predicate.""" raise Exception('something went wrong') validator.add_record_predicate(buggy_record_predicate) def buggy_assert(r): """I am a buggy assert.""" raise Exception('something went wrong') validator.assert_something_buggy = buggy_assert def buggy_check(r): """I am a buggy check.""" raise Exception('something went wrong') validator.check_something_buggy = buggy_check def buggy_each(r): """I am a buggy each.""" raise Exception('something went wrong') validator.each_something_buggy = buggy_each def buggy_finally_assert(): """I am a buggy finally assert.""" raise Exception('something went wrong') validator.finally_assert_something_buggy = buggy_finally_assert def buggy_skip(record): """I am a buggy skip.""" raise Exception('something went wrong') validator.add_skip(buggy_skip) data = (('foo', 'bar'), ('ab', '56')) problems = validator.validate(data, report_unexpected_exceptions=False) n = len(problems) assert n == 1, n p = problems[0] assert p['row'] == 2 problems = validator.validate( data) # by default, exceptions are reported as problems n = len(problems) assert n == 10, n unexpected_problems = [ p for p in problems if p['code'] == UNEXPECTED_EXCEPTION ] assert len(unexpected_problems) == 9 for p in unexpected_problems: e = p['exception'] assert e.args[0] == 'something went wrong', e.args
def test_value_checks(): """Some very simple tests of value checks.""" # a simple validator to be tested field_names=('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('foo', int) validator.add_value_check('bar', float) # some test data data = ( ('foo', 'bar'), # row 1 - header row ('12', '3.4'), # row 2 - valid ('1.2', '3.4'), # row 3 - foo invalid ('abc', '3.4'), # row 4 - foo invalid ('12', 'abc'), # row 5 - bar invalid ('', '3.4'), # row 6 - foo invalid (empty) ('12', ''), # row 7 - bar invalid (empty) ('abc', 'def') # row 8 - both invalid ) # run the validator on the test data problems = validator.validate(data) assert len(problems) == 7 # N.B., expect row and column indices start from 1 problems_row2 = [p for p in problems if p['row'] == 2] assert len(problems_row2) == 0 # should be valid problems_row3 = [p for p in problems if p['row'] == 3] assert len(problems_row3) == 1 p = problems_row3[0] # convenience variable assert p['column'] == 1 # report column index assert p['field'] == 'foo' # report field name assert p['code'] == VALUE_CHECK_FAILED # default problem code for value checks assert p['message'] == MESSAGES[VALUE_CHECK_FAILED] # default message assert p['value'] == '1.2' # report bad value assert p['record'] == ('1.2', '3.4') # report record problems_row4 = [p for p in problems if p['row'] == 4] assert len(problems_row4) == 1 p = problems_row4[0] # convenience variable assert p['column'] == 1 assert p['field'] == 'foo' assert p['code'] == VALUE_CHECK_FAILED assert p['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p['value'] == 'abc' assert p['record'] == ('abc', '3.4') problems_row5 = [p for p in problems if p['row'] == 5] assert len(problems_row5) == 1 p = problems_row5[0] # convenience variable assert p['column'] == 2 assert p['field'] == 'bar' assert p['code'] == VALUE_CHECK_FAILED assert p['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p['value'] == 'abc' assert p['record'] == ('12', 'abc') problems_row6 = [p for p in problems if p['row'] == 6] assert len(problems_row6) == 1 p = problems_row6[0] # convenience variable assert p['column'] == 1 assert p['field'] == 'foo' assert p['code'] == VALUE_CHECK_FAILED assert p['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p['value'] == '' assert p['record'] == ('', '3.4') problems_row7 = [p for p in problems if p['row'] == 7] assert len(problems_row7) == 1 p = problems_row7[0] # convenience variable assert p['column'] == 2 assert p['field'] == 'bar' assert p['code'] == VALUE_CHECK_FAILED assert p['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p['value'] == '' assert p['record'] == ('12', '') problems_row8 = [p for p in problems if p['row'] == 8] assert len(problems_row8) == 2 # expect both problems are found p0 = problems_row8[0] # convenience variable assert p0['column'] == 1 assert p0['field'] == 'foo' assert p0['code'] == VALUE_CHECK_FAILED assert p0['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p0['value'] == 'abc' assert p0['record'] == ('abc', 'def') p1 = problems_row8[1] # convenience variable assert p1['column'] == 2 assert p1['field'] == 'bar' assert p1['code'] == VALUE_CHECK_FAILED assert p1['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p1['value'] == 'def' assert p1['record'] == ('abc', 'def')