def test_header_check(): """Test the header checks work.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_header_check() # use default code and message validator.add_header_check(code='X1', message='custom message') # provide custom code and message data = ( ('foo', 'baz'), ('123', '456') ) problems = validator.validate(data) assert len(problems) == 2 p0 = problems[0] assert p0['code'] == HEADER_CHECK_FAILED assert p0['message'] == MESSAGES[HEADER_CHECK_FAILED] assert p0['record'] == ('foo', 'baz') assert p0['missing'] == set(['bar']) assert p0['unexpected'] == set(['baz']) assert p0['row'] == 1 p1 = problems[1] assert p1['code'] == 'X1' assert p1['message'] == 'custom message' assert p1['missing'] == set(['bar']) assert p1['unexpected'] == set(['baz']) assert p1['record'] == ('foo', 'baz') assert p1['row'] == 1
def test_ignore_lines(): """Test instructions to ignore lines works.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_header_check() validator.add_value_check('foo', int) validator.add_value_check('bar', float) data = ( ('ignore', 'me', 'please'), ('ignore', 'me', 'too', 'please'), ('foo', 'baz'), ('1.2', 'abc') ) problems = validator.validate(data, ignore_lines=2) assert len(problems) == 3 header_problems = [p for p in problems if p['code'] == HEADER_CHECK_FAILED] assert len(header_problems) == 1 assert header_problems[0]['row'] == 3 value_problems = [p for p in problems if p['code'] == VALUE_CHECK_FAILED] assert len(value_problems) == 2 for p in value_problems: assert p['row'] == 4
def test_ignore_lines(): """Test instructions to ignore lines works.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_header_check() validator.add_value_check('foo', int) validator.add_value_check('bar', float) data = ( ('ignore', 'me', 'please'), ('ignore', 'me', 'too', 'please'), ('foo', 'baz'), ('1.2', 'abc') ) problems = validator.validate(data, ignore_lines=2) assert len(problems) == 3 header_problems = [p for p in problems if p['code'] == HEADER_CHECK_FAILED] assert len(header_problems) == 1 assert header_problems[0]['row'] == 3 value_problems = [p for p in problems if p['code'] == VALUE_CHECK_FAILED] assert len(value_problems) == 2 for p in value_problems: assert p['row'] == 4
def test_header_check(): """Test the header checks work.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_header_check() # use default code and message validator.add_header_check( code='X1', message='custom message') # provide custom code and message data = (('foo', 'baz'), ('123', '456')) problems = validator.validate(data) assert len(problems) == 2 p0 = problems[0] assert p0['code'] == HEADER_CHECK_FAILED assert p0['message'] == MESSAGES[HEADER_CHECK_FAILED] assert p0['record'] == ('foo', 'baz') assert p0['missing'] == set(['bar']) assert p0['unexpected'] == set(['baz']) assert p0['row'] == 1 p1 = problems[1] assert p1['code'] == 'X1' assert p1['message'] == 'custom message' assert p1['missing'] == set(['bar']) assert p1['unexpected'] == set(['baz']) assert p1['record'] == ('foo', 'baz') assert p1['row'] == 1
def create_validator(): """Create an example CSV validator for patient demographic data.""" field_names = ('study_id', 'patient_id', 'gender', 'age_years', 'age_months', 'date_inclusion') validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') # some simple value checks validator.add_value_check('study_id', int, 'EX3', 'study id must be an integer') validator.add_value_check('patient_id', int, 'EX4', 'patient id must be an integer') validator.add_value_check('gender', enumeration('M', 'F'), 'EX5', 'invalid gender') validator.add_value_check('age_years', number_range_inclusive(0, 120, int), 'EX6', 'invalid age in years') validator.add_value_check('date_inclusion', datetime_string('%Y-%m-%d'), 'EX7', 'invalid date') # a more complicated record check def check_age_variables(r): age_years = int(r['age_years']) age_months = int(r['age_months']) valid = (age_months >= age_years * 12 and age_months % age_years < 12) if not valid: raise RecordError('EX8', 'invalid age variables') validator.add_record_check(check_age_variables) return validator
def _validate_csv(csv_file, output_file=None): """ Validates a CSV file. :param csv_file: The CSV file to validate :param output_file: The optional output file to which problems should be written :returns: True if the CSV file is valid, false otherwise """ field_names = _get_header(csv_file) validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') with open(csv_file) as fp: data = csv.reader(fp) problems = validator.validate(data) if problems: write_problems(problems, output_file or sys.stdout) return False else: return True
def generate(self): validator = CSVValidator(self.field_names) validator.add_header_check() validator.add_record_length_check() for value, check in self.value_checks: validator.add_value_check(value, check) validator.add_unique_check(self.unique_checks) return validator
def test_response_contents(self, register_fields, endpoint): response = requests.get(urljoin(endpoint, 'blobs.csv')) validator = CSVValidator(['_id'] + register_fields) validator.add_header_check() problems = validator.validate(csv.reader(response.text.split('\r\n'))) assert problems == [], '/blobs CSV fields do not match the register definition'
def test_response_contents(self, register_fields, endpoint): response = requests.get(urljoin(endpoint, 'blobs.csv')) validator = CSVValidator(['_id'] + register_fields) validator.add_header_check() problems = validator.validate(csv.reader(response.text.split('\r\n'))) assert problems == [], '/blobs CSV fields do not match the register definition'
def validate_csv(csv): field_names = ("provider_id", "kind", "name", "dosage", "measure", "amount") validator = CSVValidator(field_names) kinds = [kind[0] for kind in Medicine.KIND] validator.add_header_check("HEADER", "bad header") validator.add_value_check("provider_id", int, "PROVIDER", "provider_id must be an integer") validator.add_value_check("kind", enumeration(*kinds), "PROVIDER", f"kind must be {str(kinds)}") return validator.validate(csv)
def create_validator(): """Create an example CSV validator for patient demographic data.""" field_names = ( 'study_id', 'patient_id', 'gender', 'age_years', 'age_months', 'date_inclusion' ) validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') # some simple value checks validator.add_value_check('study_id', int, 'EX3', 'study id must be an integer') validator.add_value_check('patient_id', int, 'EX4', 'patient id must be an integer') validator.add_value_check('gender', enumeration('M', 'F'), 'EX5', 'invalid gender') validator.add_value_check('age_years', number_range_inclusive(0, 120, int), 'EX6', 'invalid age in years') validator.add_value_check('date_inclusion', datetime_string('%Y-%m-%d'), 'EX7', 'invalid date') # a more complicated record check def check_age_variables(r): age_years = int(r['age_years']) age_months = int(r['age_months']) valid = (age_months >= age_years * 12 and age_months % age_years < 12) if not valid: raise RecordError('EX8', 'invalid age variables') validator.add_record_check(check_age_variables) return validator
def create_validator(): """Create an example CSV validator for patient demographic data.""" # def CheckAlpha(s=''): if len(s) > 0: # s=FixString(s) if not s.replace(" ","").isalpha() and len(s) > 0: return False # Logit("CheckAlpha: LineNo - " + str(ln+1) + " | Mem ID - " + curline[0] + " |" + COLDESC[(i+1)] +" - " + s + " :: Not a alphabetic letter.") return True field_names = ( 'CUSTID', 'FIRSTNAME', 'LASTNAME', 'CUSTNM', 'ADDRESS1', 'ADDRESS2', 'POSTCODE', 'CITY', 'STATE', 'WORKPHONE', 'WORKFAX', 'PHONE', 'MOBILE', 'EMAIL', 'ORGANISATION', 'EOL' ) validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') # some simple value checks validator.add_value_check('CUSTID', int, 'EX3', 'CUSTID must be an integer') validator.add_value_check('FIRSTNAME', CheckAlpha, 'EX4', 'FIRSTNAME must be an integer') validator.add_value_check('LASTNAME', str, 'EX5', 'invalid LASTNAME') validator.add_value_check('CUSTNM', str, 'EX6', 'invalid CUSTNM') validator.add_value_check('ADDRESS1', str, 'EX7', 'invalid ADDRESS1') validator.add_value_check('ADDRESS2', str, 'EX8', 'invalid ADDRESS2') validator.add_value_check('POSTCODE', int, 'EX9', 'invalid POSTCODE') validator.add_value_check('CITY', str, 'EX10', 'invalid CITY') validator.add_value_check('STATE', str, 'EX11', 'invalid STATE') validator.add_value_check('WORKPHONE', int, 'EX12', 'invalid WORKPHONE') validator.add_value_check('WORKFAX', int, 'EX13', 'invalid WORKFAC') validator.add_value_check('PHONE', int, 'EX14', 'invalid PHONE') validator.add_value_check('MOBILE', int, 'EX15', 'invalid MOBILE') validator.add_value_check('EMAIL', str, 'EX16', 'invalid EMAIL') validator.add_value_check('ORGANISATION', str, 'EX17', 'invalid ORGANISATION') validator.add_value_check('EOL', str, 'EX18', 'invalid EOL') # a more complicated record check def check_age_variables(r): CUSTNM = int(r['CUSTNM']) ADDRESS1 = int(r['ADDRESS1']) valid = (ADDRESS1 >= CUSTNM * 12 and ADDRESS1 % CUSTNM < 12) if not valid: raise RecordError('EX8', 'invalid age variables') validator.add_record_check(check_age_variables) return validator