def test_unique_validator_fails(fields, row, unique_with, exception, bad): validator = UniqueValidator(unique_with=unique_with) with pytest.raises(exception): for field in fields: validator.validate(field, row) assert validator.bad == bad
class YourFirstNonCommaDelimitedValidator(Vlad): source = LocalFile("bats.csv") validators = { "Column A": [UniqueValidator()], "Column B": [SetValidator(["Vampire", "Not A Vampire"])], } delimiter = "|"
def test_initialize_vlad(): source = LocalFile('vladiate/examples/vampires.csv') validators = { 'Column A': [UniqueValidator()], 'Column B': [SetValidator(['Vampire', 'Not A Vampire'])] } assert Vlad(source=source, validators=validators).validate()
def test_initialize_vlad(): source = LocalFile("vladiate/examples/vampires.csv") validators = { "Column A": [UniqueValidator()], "Column B": [SetValidator(["Vampire", "Not A Vampire"])], } assert Vlad(source=source, validators=validators).validate()
class Validator(Vlad): source = LocalFile("repos.csv") validators = { "contact": [ RegexValidator(r"\w[\w\-' ]+ <[\w\-.]+@[\w\-.]+>", full=True, empty_ok=True) ], "doi": [RegexValidator(r"[\w\-./]+", full=True, empty_ok=True)], "funders": [RegexValidator(r"(\d+;?)+", full=True, empty_ok=True)], "homepage_url": [RegexValidator(r"https?://.+", full=True, empty_ok=True)], "licence": [ SetValidator( [ "Apache-2.0", "Artistic-2.0", "BSD-2-Clause", "BSD-3-Clause", "CECILL-2.1", "GPL-2.0", "GPL-3.0", "MIT", "MPL-2.0", "NCSA", ], empty_ok=True, ) ], "organisations": [SetValidator(["grid.457348.9", "grid.4991.5"], empty_ok=True)], "rsotm": [RegexValidator(r"\d{4}-\d{2}", full=True, empty_ok=True)], "url": [UniqueValidator()], }
def test_initialize_vlad_with_alternative_delimiter(): source = LocalFile('vladiate/examples/bats.csv') validators = { 'Column A': [UniqueValidator()], 'Column B': [SetValidator(['Vampire', 'Not A Vampire'])] } delimiter = '|' vlad = Vlad(source=source, validators=validators, delimiter=delimiter) assert vlad.validate()
def test_unused_validator_fails_validation(): source = LocalFile('vladiate/examples/vampires.csv') validators = { 'Column A': [UniqueValidator()], 'Column B': [SetValidator(['Vampire', 'Not A Vampire'])], 'Column C': [FloatValidator()] } assert not Vlad(source=source, validators=validators).validate()
def test_unused_validator_fails_validation(): source = LocalFile("vladiate/examples/vampires.csv") validators = { "Column A": [UniqueValidator()], "Column B": [SetValidator(["Vampire", "Not A Vampire"])], "Column C": [FloatValidator()], } assert not Vlad(source=source, validators=validators).validate()
def test_initialize_vlad_with_alternative_delimiter(): source = LocalFile("vladiate/examples/bats.csv") validators = { "Column A": [UniqueValidator()], "Column B": [SetValidator(["Vampire", "Not A Vampire"])], } delimiter = "|" vlad = Vlad(source=source, validators=validators, delimiter=delimiter) assert vlad.validate()
class YourFirstValidator(Vlad): source = LocalFile('vampires.csv') validators = { 'Column A': [ UniqueValidator() ], 'Column B': [ SetValidator(['Vampire', 'Not A Vampire']) ] }
class YourFirstNonCommaDelimitedValidator(Vlad): source = LocalFile('bats.csv') validators = { 'Column A': [ UniqueValidator() ], 'Column B': [ SetValidator(['Vampire', 'Not A Vampire']) ] } delimiter = '|'
# We could do the validation directioly in the first read block below # But for now let's add an additional read through the data # Annoyingly, you have to instantiate that "validators" block for each record # Otherwise they seem to accumulate extra values. # Beyond the 2 new validators defined above, we're using vladiate defined options allgood = True badfiles = [] for sfn in os.listdir(args.indir + args.sub): if sfn not in skip and sfn.endswith(".tsv"): validators = { 'docId': [ UniqueValidator(unique_with=['annotSet', 'annotId']), #UniqueValidator(unique_with=['annotSet', 'startOffset']) ], 'annotId': [RegexValidator(pattern=r'T?\d*-?\d+', full=True)], 'annotType': [ SetValidator([ 'Quantity', 'Qualifier', 'MeasuredProperty', 'MeasuredEntity' ]) ], 'annotSet': [IntValidator()], 'startOffset': [IntValidator()], 'endOffset': [IntValidator()], 'other': [JsonValidator(empty_ok=True)], 'text': [LengthValidator()] }
def test_unique_validator_works(fields, row, unique_with): validator = UniqueValidator(unique_with=unique_with) for field in fields: validator.validate(field, row)
class TestVlad(Vlad): validators = { 'Column A': [UniqueValidator()], 'Column B': [SetValidator(['Vampire', 'Not A Vampire'])] }
class TestVlad(Vlad): validators = { 'Column A': [UniqueValidator()], }
class TestVlad(Vlad): validators = { "Column A": [UniqueValidator()], "Column B": [SetValidator(["Vampire", "Not A Vampire"])], }
class TestVlad(Vlad): validators = {"Column A": [UniqueValidator()]}
class YourFirstValidator(Vlad): source = LocalFile("vampires.csv") validators = { "Column A": [UniqueValidator()], "Column B": [SetValidator(["Vampire", "Not A Vampire"])], }
class WeatherValidator(Vlad): source = TOA5Input('SPER_CR6_Daily.dat') validators = { 'TIMESTAMP': [ RegexValidator( pattern= "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$", empty_ok=False) ], 'RECORD': [UniqueValidator(empty_ok=False)], 'BattV_Min': [RangeValidator(low=float(columns[1][2]), high=float(columns[2][2]))], 'WS_ms_Avg': [RangeValidator(low=float(columns[1][3]), high=float(columns[2][3]))], 'WS_ms_Max': [RangeValidator(low=float(columns[1][4]), high=float(columns[2][4]))], 'WS_ms_TMx': [ RegexValidator( pattern= "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$", empty_ok=False) ], 'WS_ms_S_WVT': [RangeValidator(low=float(columns[1][6]), high=float(columns[2][6]))], 'WindDir_D1_WVT': [RangeValidator(low=float(columns[1][7]), high=float(columns[2][8]))], 'WindDir_SD1_WVT': [RangeValidator(low=float(columns[1][8]), high=float(columns[2][9]))], 'SlrMJ_Tot': [ RangeValidator(low=float(columns[1][10]), high=float(columns[2][10])) ], 'Rain_mm_Tot': [ RangeValidator(low=float(columns[1][11]), high=float(columns[2][11])) ], 'AirTC_Avg': [ RangeValidator(low=float(columns[1][12]), high=float(columns[2][12])) ], 'AirTC_Max': [ RangeValidator(low=float(columns[1][13]), high=float(columns[2][13])) ], 'AirTC_TMx': [ RegexValidator( pattern= "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$", empty_ok=False) ], 'AirTC_Min': [ RangeValidator(low=float(columns[1][14]), high=float(columns[2][14])) ], 'AirTC_TMn': [ RegexValidator( pattern= "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$", empty_ok=False) ], 'RH_Max': [ RangeValidator(low=float(columns[1][16]), high=float(columns[2][16])) ], 'RH_TMx': [ RegexValidator( pattern= "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$", empty_ok=False) ], 'RH_Min': [ RangeValidator(low=float(columns[1][18]), high=float(columns[2][18])) ], 'RH_TMn': [ RegexValidator( pattern= "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$", empty_ok=False) ] }
def test_unique_validator_supports_empty_ok(fields, row, unique_with): validator = UniqueValidator(unique_with=unique_with, empty_ok=True) for field in fields: validator.validate(field, row)