Exemplo n.º 1
0
class Validator(Vlad):
    source = LocalFile("repos.csv")
    validators = {
        "contact": [
            RegexValidator(r"\w[\w\-' ]+ <[\w\-.]+@[\w\-.]+>",
                           full=True,
                           empty_ok=True)
        ],
        "doi": [RegexValidator(r"[\w\-./]+", full=True, empty_ok=True)],
        "funders": [RegexValidator(r"(\d+;?)+", full=True, empty_ok=True)],
        "homepage_url":
        [RegexValidator(r"https?://.+", full=True, empty_ok=True)],
        "licence": [
            SetValidator(
                [
                    "Apache-2.0",
                    "Artistic-2.0",
                    "BSD-2-Clause",
                    "BSD-3-Clause",
                    "CECILL-2.1",
                    "GPL-2.0",
                    "GPL-3.0",
                    "MIT",
                    "MPL-2.0",
                    "NCSA",
                ],
                empty_ok=True,
            )
        ],
        "organisations":
        [SetValidator(["grid.457348.9", "grid.4991.5"], empty_ok=True)],
        "rsotm": [RegexValidator(r"\d{4}-\d{2}", full=True, empty_ok=True)],
        "url": [UniqueValidator()],
    }
Exemplo n.º 2
0
# But for now let's add an additional read through the data
# Annoyingly, you have to instantiate that "validators" block for each record
# Otherwise they seem to accumulate extra values.

# Beyond the 2 new validators defined above, we're using vladiate defined options

allgood = True
badfiles = []
for sfn in os.listdir(args.indir + args.sub):
    if sfn not in skip and sfn.endswith(".tsv"):
        validators = {
            'docId': [
                UniqueValidator(unique_with=['annotSet', 'annotId']),
                #UniqueValidator(unique_with=['annotSet', 'startOffset'])
            ],
            'annotId': [RegexValidator(pattern=r'T?\d*-?\d+', full=True)],
            'annotType': [
                SetValidator([
                    'Quantity', 'Qualifier', 'MeasuredProperty',
                    'MeasuredEntity'
                ])
            ],
            'annotSet': [IntValidator()],
            'startOffset': [IntValidator()],
            'endOffset': [IntValidator()],
            'other': [JsonValidator(empty_ok=True)],
            'text': [LengthValidator()]
        }
        #print(sfn)
        truth = Vlad(source=LocalFile(args.indir + args.sub + sfn),
                     validators=validators,
Exemplo n.º 3
0
def test_regex_validator_works(pattern, field):
    RegexValidator(pattern).validate(field)
Exemplo n.º 4
0
def test_regex_validator_fails(pattern, field):
    validator = RegexValidator(pattern)
    with pytest.raises(ValidationException):
        validator.validate(field)

    assert validator.bad == {field}
Exemplo n.º 5
0
class WeatherValidator(Vlad):
    source = TOA5Input('SPER_CR6_Daily.dat')
    validators = {
        'TIMESTAMP': [
            RegexValidator(
                pattern=
                "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$",
                empty_ok=False)
        ],
        'RECORD': [UniqueValidator(empty_ok=False)],
        'BattV_Min':
        [RangeValidator(low=float(columns[1][2]), high=float(columns[2][2]))],
        'WS_ms_Avg':
        [RangeValidator(low=float(columns[1][3]), high=float(columns[2][3]))],
        'WS_ms_Max':
        [RangeValidator(low=float(columns[1][4]), high=float(columns[2][4]))],
        'WS_ms_TMx': [
            RegexValidator(
                pattern=
                "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$",
                empty_ok=False)
        ],
        'WS_ms_S_WVT':
        [RangeValidator(low=float(columns[1][6]), high=float(columns[2][6]))],
        'WindDir_D1_WVT':
        [RangeValidator(low=float(columns[1][7]), high=float(columns[2][8]))],
        'WindDir_SD1_WVT':
        [RangeValidator(low=float(columns[1][8]), high=float(columns[2][9]))],
        'SlrMJ_Tot': [
            RangeValidator(low=float(columns[1][10]),
                           high=float(columns[2][10]))
        ],
        'Rain_mm_Tot': [
            RangeValidator(low=float(columns[1][11]),
                           high=float(columns[2][11]))
        ],
        'AirTC_Avg': [
            RangeValidator(low=float(columns[1][12]),
                           high=float(columns[2][12]))
        ],
        'AirTC_Max': [
            RangeValidator(low=float(columns[1][13]),
                           high=float(columns[2][13]))
        ],
        'AirTC_TMx': [
            RegexValidator(
                pattern=
                "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$",
                empty_ok=False)
        ],
        'AirTC_Min': [
            RangeValidator(low=float(columns[1][14]),
                           high=float(columns[2][14]))
        ],
        'AirTC_TMn': [
            RegexValidator(
                pattern=
                "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$",
                empty_ok=False)
        ],
        'RH_Max': [
            RangeValidator(low=float(columns[1][16]),
                           high=float(columns[2][16]))
        ],
        'RH_TMx': [
            RegexValidator(
                pattern=
                "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$",
                empty_ok=False)
        ],
        'RH_Min': [
            RangeValidator(low=float(columns[1][18]),
                           high=float(columns[2][18]))
        ],
        'RH_TMn': [
            RegexValidator(
                pattern=
                "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$",
                empty_ok=False)
        ]
    }