def create_validator(): """Create an example CSV validator for patient demographic data.""" field_names = ('study_id', 'patient_id', 'gender', 'age_years', 'age_months', 'date_inclusion') validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') # some simple value checks validator.add_value_check('study_id', int, 'EX3', 'study id must be an integer') validator.add_value_check('patient_id', int, 'EX4', 'patient id must be an integer') validator.add_value_check('gender', enumeration('M', 'F'), 'EX5', 'invalid gender') validator.add_value_check('age_years', number_range_inclusive(0, 120, int), 'EX6', 'invalid age in years') validator.add_value_check('date_inclusion', datetime_string('%Y-%m-%d'), 'EX7', 'invalid date') # a more complicated record check def check_age_variables(r): age_years = int(r['age_years']) age_months = int(r['age_months']) valid = (age_months >= age_years * 12 and age_months % age_years < 12) if not valid: raise RecordError('EX8', 'invalid age variables') validator.add_record_check(check_age_variables) return validator
def test_record_checks(): """Test the use of record checks.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) def foo_gt_bar(r): foo = int(r['foo']) bar = int(r['bar']) if foo < bar: raise RecordError validator.add_record_check(foo_gt_bar) # use default code and message def foo_gt_2bar(r): foo = int(r['foo']) bar = int(r['bar']) if foo < 2 * bar: raise RecordError('X4', 'custom message') validator.add_record_check(foo_gt_2bar) data = ( ('foo', 'bar'), ('7', '3'), # valid ('5', '3'), # invalid - not foo_gt_2bar ('1', '3') # invalid - both predicates false ) problems = validator.validate(data) n = len(problems) assert n == 3, n row3_problems = [p for p in problems if p['row'] == 3] assert len(row3_problems) == 1 p = row3_problems[0] assert p['code'] == 'X4' assert p['message'] == 'custom message' assert p['record'] == ('5', '3') row4_problems = [p for p in problems if p['row'] == 4] assert len(row4_problems) == 2 row4_problems_default = [ p for p in row4_problems if p['code'] == RECORD_CHECK_FAILED ] assert len(row4_problems_default) == 1 p = row4_problems_default[0] assert p['message'] == MESSAGES[RECORD_CHECK_FAILED] assert p['record'] == ('1', '3') row4_problems_custom = [p for p in row4_problems if p['code'] == 'X4'] assert len(row4_problems_custom) == 1 p = row4_problems_custom[0] assert p['message'] == 'custom message' assert p['record'] == ('1', '3')
def test_record_checks(): """Test the use of record checks.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) def foo_gt_bar(r): foo = int(r['foo']) bar = int(r['bar']) if foo < bar: raise RecordError validator.add_record_check(foo_gt_bar) # use default code and message def foo_gt_2bar(r): foo = int(r['foo']) bar = int(r['bar']) if foo < 2 * bar: raise RecordError('X4', 'custom message') validator.add_record_check(foo_gt_2bar) data = ( ('foo', 'bar'), ('7', '3'), # valid ('5', '3'), # invalid - not foo_gt_2bar ('1', '3') # invalid - both predicates false ) problems = validator.validate(data) n = len(problems) assert n == 3, n row3_problems = [p for p in problems if p['row'] == 3] assert len(row3_problems) == 1 p = row3_problems[0] assert p['code'] == 'X4' assert p['message'] == 'custom message' assert p['record'] == ('5', '3') row4_problems = [p for p in problems if p['row'] == 4] assert len(row4_problems) == 2 row4_problems_default = [p for p in row4_problems if p['code'] == RECORD_CHECK_FAILED] assert len(row4_problems_default) == 1 p = row4_problems_default[0] assert p['message'] == MESSAGES[RECORD_CHECK_FAILED] assert p['record'] == ('1', '3') row4_problems_custom = [p for p in row4_problems if p['code'] == 'X4'] assert len(row4_problems_custom) == 1 p = row4_problems_custom[0] assert p['message'] == 'custom message' assert p['record'] == ('1', '3')
def create_validator(): """Create an example CSV validator for patient demographic data.""" field_names = ( 'study_id', 'patient_id', 'gender', 'age_years', 'age_months', 'date_inclusion' ) validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') # some simple value checks validator.add_value_check('study_id', int, 'EX3', 'study id must be an integer') validator.add_value_check('patient_id', int, 'EX4', 'patient id must be an integer') validator.add_value_check('gender', enumeration('M', 'F'), 'EX5', 'invalid gender') validator.add_value_check('age_years', number_range_inclusive(0, 120, int), 'EX6', 'invalid age in years') validator.add_value_check('date_inclusion', datetime_string('%Y-%m-%d'), 'EX7', 'invalid date') # a more complicated record check def check_age_variables(r): age_years = int(r['age_years']) age_months = int(r['age_months']) valid = (age_months >= age_years * 12 and age_months % age_years < 12) if not valid: raise RecordError('EX8', 'invalid age variables') validator.add_record_check(check_age_variables) return validator
def test_exception_handling(): """Establish expectations for exception handling.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('foo', int) def buggy_value_check(v): """I am a buggy value check.""" raise Exception('something went wrong') validator.add_value_check('bar', buggy_value_check) def buggy_value_predicate(v): """I am a buggy value predicate.""" raise Exception('something went wrong') validator.add_value_predicate('bar', buggy_value_predicate) def buggy_record_check(r): """I am a buggy record check.""" raise Exception('something went wrong') validator.add_record_check(buggy_record_check) def buggy_record_predicate(r): """I am a buggy record predicate.""" raise Exception('something went wrong') validator.add_record_predicate(buggy_record_predicate) def buggy_assert(r): """I am a buggy assert.""" raise Exception('something went wrong') validator.assert_something_buggy = buggy_assert def buggy_check(r): """I am a buggy check.""" raise Exception('something went wrong') validator.check_something_buggy = buggy_check def buggy_each(r): """I am a buggy each.""" raise Exception('something went wrong') validator.each_something_buggy = buggy_each def buggy_finally_assert(): """I am a buggy finally assert.""" raise Exception('something went wrong') validator.finally_assert_something_buggy = buggy_finally_assert def buggy_skip(record): """I am a buggy skip.""" raise Exception('something went wrong') validator.add_skip(buggy_skip) data = ( ('foo', 'bar'), ('ab', '56') ) problems = validator.validate(data, report_unexpected_exceptions=False) n = len(problems) assert n == 1, n p = problems[0] assert p['row'] == 2 problems = validator.validate(data) # by default, exceptions are reported as problems n = len(problems) assert n == 10, n unexpected_problems = [p for p in problems if p['code'] == UNEXPECTED_EXCEPTION] assert len(unexpected_problems) == 9 for p in unexpected_problems: e = p['exception'] assert e.args[0] == 'something went wrong', e.args
def test_exception_handling(): """Establish expectations for exception handling.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('foo', int) def buggy_value_check(v): """I am a buggy value check.""" raise Exception('something went wrong') validator.add_value_check('bar', buggy_value_check) def buggy_value_predicate(v): """I am a buggy value predicate.""" raise Exception('something went wrong') validator.add_value_predicate('bar', buggy_value_predicate) def buggy_record_check(r): """I am a buggy record check.""" raise Exception('something went wrong') validator.add_record_check(buggy_record_check) def buggy_record_predicate(r): """I am a buggy record predicate.""" raise Exception('something went wrong') validator.add_record_predicate(buggy_record_predicate) def buggy_assert(r): """I am a buggy assert.""" raise Exception('something went wrong') validator.assert_something_buggy = buggy_assert def buggy_check(r): """I am a buggy check.""" raise Exception('something went wrong') validator.check_something_buggy = buggy_check def buggy_each(r): """I am a buggy each.""" raise Exception('something went wrong') validator.each_something_buggy = buggy_each def buggy_finally_assert(): """I am a buggy finally assert.""" raise Exception('something went wrong') validator.finally_assert_something_buggy = buggy_finally_assert def buggy_skip(record): """I am a buggy skip.""" raise Exception('something went wrong') validator.add_skip(buggy_skip) data = (('foo', 'bar'), ('ab', '56')) problems = validator.validate(data, report_unexpected_exceptions=False) n = len(problems) assert n == 1, n p = problems[0] assert p['row'] == 2 problems = validator.validate( data) # by default, exceptions are reported as problems n = len(problems) assert n == 10, n unexpected_problems = [ p for p in problems if p['code'] == UNEXPECTED_EXCEPTION ] assert len(unexpected_problems) == 9 for p in unexpected_problems: e = p['exception'] assert e.args[0] == 'something went wrong', e.args
def create_validator(): """Create an example CSV validator for patient demographic data.""" # def CheckAlpha(s=''): if len(s) > 0: # s=FixString(s) if not s.replace(" ","").isalpha() and len(s) > 0: return False # Logit("CheckAlpha: LineNo - " + str(ln+1) + " | Mem ID - " + curline[0] + " |" + COLDESC[(i+1)] +" - " + s + " :: Not a alphabetic letter.") return True field_names = ( 'CUSTID', 'FIRSTNAME', 'LASTNAME', 'CUSTNM', 'ADDRESS1', 'ADDRESS2', 'POSTCODE', 'CITY', 'STATE', 'WORKPHONE', 'WORKFAX', 'PHONE', 'MOBILE', 'EMAIL', 'ORGANISATION', 'EOL' ) validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') # some simple value checks validator.add_value_check('CUSTID', int, 'EX3', 'CUSTID must be an integer') validator.add_value_check('FIRSTNAME', CheckAlpha, 'EX4', 'FIRSTNAME must be an integer') validator.add_value_check('LASTNAME', str, 'EX5', 'invalid LASTNAME') validator.add_value_check('CUSTNM', str, 'EX6', 'invalid CUSTNM') validator.add_value_check('ADDRESS1', str, 'EX7', 'invalid ADDRESS1') validator.add_value_check('ADDRESS2', str, 'EX8', 'invalid ADDRESS2') validator.add_value_check('POSTCODE', int, 'EX9', 'invalid POSTCODE') validator.add_value_check('CITY', str, 'EX10', 'invalid CITY') validator.add_value_check('STATE', str, 'EX11', 'invalid STATE') validator.add_value_check('WORKPHONE', int, 'EX12', 'invalid WORKPHONE') validator.add_value_check('WORKFAX', int, 'EX13', 'invalid WORKFAC') validator.add_value_check('PHONE', int, 'EX14', 'invalid PHONE') validator.add_value_check('MOBILE', int, 'EX15', 'invalid MOBILE') validator.add_value_check('EMAIL', str, 'EX16', 'invalid EMAIL') validator.add_value_check('ORGANISATION', str, 'EX17', 'invalid ORGANISATION') validator.add_value_check('EOL', str, 'EX18', 'invalid EOL') # a more complicated record check def check_age_variables(r): CUSTNM = int(r['CUSTNM']) ADDRESS1 = int(r['ADDRESS1']) valid = (ADDRESS1 >= CUSTNM * 12 and ADDRESS1 % CUSTNM < 12) if not valid: raise RecordError('EX8', 'invalid age variables') validator.add_record_check(check_age_variables) return validator