Пример #1
0
def test_validator_invalid_boolean():

    TEST_DATA = {"boolean_field": "not true"}
    TEST_SCHEMA = {"fields": [{"name": "boolean_field", "type": "boolean"}]}

    test = Schema(TEST_SCHEMA)
    assert not test.validate(TEST_DATA)
Пример #2
0
def test_validator_invalid_string():

    TEST_DATA = {"string_field": 100}
    TEST_SCHEMA = {"fields": [{"name": "string_field", "type": "string"}]}

    test = Schema(TEST_SCHEMA)
    assert not test.validate(TEST_DATA)
Пример #3
0
def test_validator_all_valid_values():

    TEST_DATA = {
        "string_field": "string",
        "integer_field": 100,
        "boolean_field": True,
        "date_field": datetime.datetime.today(),
        "other_field": ["abc"],
        "nullable_field": None,
        "list_field": ["a", "b", "c"],
        "enum_field": "RED",
    }
    TEST_SCHEMA = {
        "fields": [
            {"name": "string_field", "type": "string"},
            {"name": "str_null_field", "type": ["string", "nullable"]},
            {"name": "integer_field", "type": "numeric"},
            {"name": "boolean_field", "type": "boolean"},
            {"name": "date_field", "type": "date"},
            {"name": "other_field", "type": "other"},
            {"name": "nullable_field", "type": "nullable"},
            {"name": "list_field", "type": "list"},
            {"name": "enum_field", "type": "enum", "symbols": ["RED", "GREEN", "BLUE"]},
        ]
    }

    test = Schema(TEST_SCHEMA)
    assert test.validate(TEST_DATA), test.last_error
Пример #4
0
def test_validator_cve_format():

    INVALID_TEST_DATA = {"cve": "eternalblue"}
    VALID_TEST_DATA = {"cve": "CVE-2017-0144"}
    TEST_SCHEMA = {"fields": [{"name": "cve", "type": "cve"}]}

    test = Schema(TEST_SCHEMA)
    assert not test.validate(INVALID_TEST_DATA)
    assert test.validate(VALID_TEST_DATA), test.last_error
Пример #5
0
def test_validator_list():

    INVALID_TEST_DATA = {"key": "not a list"}
    VALID_TEST_DATA = {"key": ["is", "a", "list"]}
    TEST_SCHEMA = {"fields": [{"name": "key", "type": "list"}]}

    test = Schema(TEST_SCHEMA)
    assert not test.validate(INVALID_TEST_DATA)
    assert test.validate(VALID_TEST_DATA)
Пример #6
0
def test_validator_enum():

    INVALID_TEST_DATA = {"key": "left"}
    VALID_TEST_DATA = {"key": "north"}
    TEST_SCHEMA = {
        "fields": [{"name": "key", "type": "enum", "symbols": ["north", "south"]}]
    }

    test = Schema(TEST_SCHEMA)
    assert not test.validate(INVALID_TEST_DATA)
    assert test.validate(VALID_TEST_DATA)
Пример #7
0
def test_raise_exception():

    TEST_DATA = {"number_field": "one hundred"}
    TEST_SCHEMA = {"fields": [{"name": "number_field", "type": "numeric"}]}

    test = Schema(TEST_SCHEMA)
    failed = False
    try:
        test.validate(TEST_DATA, raise_exception=True)
    except ValidationError:  # pragma: no cover
        failed = True

    assert failed
Пример #8
0
def test_validator_date():

    INVALID_TEST_DATA_1 = {"key": "tomorrow"}
    INVALID_TEST_DATA_2 = {"key": "2020001001"}
    INVALID_TEST_DATA_3 = {"key": "2020-00-01"}
    VALID_TEST_DATA = {"key": "2020-01-01"}
    TEST_SCHEMA = {"fields": [{"name": "key", "type": "date"}]}

    test = Schema(TEST_SCHEMA)
    assert not test.validate(INVALID_TEST_DATA_1)
    assert not test.validate(INVALID_TEST_DATA_2)
    assert not test.validate(INVALID_TEST_DATA_3)
    assert test.validate(VALID_TEST_DATA)
Пример #9
0
def test_validator_multiple_types():

    TEST_DATA_1 = {"multi": "True"}
    TEST_DATA_2 = {"multi": True}
    TEST_DATA_3 = {"multi": None}
    TEST_SCHEMA = {
        "fields": [{"name": "multi", "type": ["string", "boolean", "nullable"]}]
    }

    test = Schema(TEST_SCHEMA)
    assert test.validate(TEST_DATA_1)
    assert test.validate(TEST_DATA_2)
    assert test.validate(TEST_DATA_3)
Пример #10
0
def test_call_alias():

    TEST_DATA = {"number_field": 100}
    TEST_SCHEMA = {"fields": [{"name": "number_field", "type": "numeric"}]}

    test = Schema(TEST_SCHEMA)
    assert test(TEST_DATA)
Пример #11
0
def test_validator_invalid_schema():

    result = True
    try:
        Schema({"name": "string"})
    except:  # pragma: no cover
        result = False
    assert not result
Пример #12
0
def test_validator_extended_schema():
    """
    Ensure the validator will ignore additional fields in the schema
    """
    TEST_DATA = {"string_field": "the"}
    TEST_SCHEMA = {
        "table": "this is a test schema",
        "fields": [
            {
                "name": "string_field",
                "type": "string",
                "description": "character array",
                "last_updated": datetime.datetime.today(),
            }
        ],
    }

    test = Schema(TEST_SCHEMA)
    assert test.validate(TEST_DATA)
Пример #13
0
def test_unknown_type():

    TEST_SCHEMA = {"fields": [{"name": "key", "type": "not_a_known_type"}]}

    failed = False
    try:
        test = Schema(TEST_SCHEMA)
    except ValueError:  # pragma: no cover
        failed = True

    assert failed
Пример #14
0
def test_validator_nonnative_types():

    TEST_DATA = {
        "integer_field": "100",
        "boolean_field": "True",
        "date_field": "2000-01-01 00:00:00.0000",
        "date_field2": "2022-02-16T23:27:08.892Z",
        "nullable_field": "",
    }
    TEST_SCHEMA = {
        "fields": [
            {"name": "integer_field", "type": "numeric"},
            {"name": "boolean_field", "type": "boolean"},
            {"name": "date_field", "type": "date"},
            {"name": "date_field2", "type": "TIMESTAMP"},
            {"name": "nullable_field", "type": "nullable"},
        ]
    }

    test = Schema(TEST_SCHEMA)
    assert test.validate(TEST_DATA), test.last_error
Пример #15
0
def test_validator_invalid_number():

    TEST_DATA = {"number_field": "one hundred"}
    TEST_SCHEMA = {"fields": [{"name": "number_field", "type": "numeric"}]}

    test = Schema(TEST_SCHEMA)
    assert not test.validate(TEST_DATA)

    TEST_DATA = {"number_field": print}
    TEST_SCHEMA = {"fields": [{"name": "number_field", "type": "numeric"}]}

    test = Schema(TEST_SCHEMA)
    assert not test.validate(TEST_DATA)
def test_writer_backout():

    if Path(TEST_FOLDER).exists():  # pragma: no cover
        shutil.rmtree(TEST_FOLDER)

    w = StreamWriter(
        dataset=TEST_FOLDER,
        inner_writer=DiskWriter,
        schema=Schema(SCHEMA),
        idle_timeout_seconds=1,
    )

    for record in DATA_SET:
        w.append(record)

    time.sleep(4)

    r = Reader(dataset=TEST_FOLDER, inner_reader=DiskReader)

    assert len(list(r)) == 8
Пример #17
0
def test_validator_loaders():
    """
    Ensure dictionary, json and json files load
    """

    TEST_SCHEMA_DICT = {"fields": [{"name": "string_field", "type": "string"}]}
    TEST_SCHEMA_STRING = orjson.dumps(TEST_SCHEMA_DICT).decode()
    TEST_SCHEMA_FILE = "temp"

    with open(TEST_SCHEMA_FILE, "w") as file:
        file.write(TEST_SCHEMA_STRING)

    failed = False
    try:
        test = Schema(TEST_SCHEMA_DICT)
        test.validate({"string_field": "pass"})
    except Exception:  # pragma: no cover
        failed = True
    assert not failed, "load schema from dictionary"

    failed = False
    try:
        test = Schema(TEST_SCHEMA_STRING)
        test.validate({"string_field": "pass"})
    except Exception:  # pragma: no cover
        failed = True
    assert not failed, "load schema from string"

    failed = False
    try:
        test = Schema(TEST_SCHEMA_FILE)
        test.validate({"string_field": "pass"})
    except Exception:  # pragma: no cover
        failed = True
    assert not failed, "load schema from file"
Пример #18
0
def jj_validate(cycles=0):
    s = Schema(schema_definition)
    for i in range(cycles):
        s.validate(data)
Пример #19
0
        writer = BatchWriter(
            inner_writer=NullWriter,
            dataset="_tests/{datefolders}",
            format=compress,
            schema=schema,
            metadata={"test_data": True},
        )
        start = time.perf_counter_ns()
        for record in reader:
            writer.append(record)
        writer.finalize()
        res.append((time.perf_counter_ns() - start) / 1e9)
    return statistics.mean(res)


schema = Schema(schema_definition)
lines = list(read_jsonl("tests/data/formats/jsonl/tweets.jsonl"))

print(len(lines))

results = []
result = {
    "format": "jsonl",
    "validation": False,
    "time": execute_test("jsonl", None, lines),
}
results.append(result)

result = {
    "format": "jsonl",
    "validation": True,
Пример #20
0
def test_validator_number_ranges():

    OVER_TEST_DATA = {"number": 1000}
    UNDER_TEST_DATA = {"number": 100}
    IN_TEST_DATA = {"number": 500}
    TEST_SCHEMA = {
        "fields": [{"name": "number", "type": "numeric", "min": 250, "max": 750}]
    }

    test = Schema(TEST_SCHEMA)
    assert not test.validate(OVER_TEST_DATA)
    assert not test.validate(UNDER_TEST_DATA)
    assert test.validate(IN_TEST_DATA)

    TEST_SCHEMA_MIN = {"fields": [{"name": "number", "type": "numeric", "min": 250}]}
    test = Schema(TEST_SCHEMA_MIN)
    assert test.validate(OVER_TEST_DATA), test.last_error
    assert not (test.validate(UNDER_TEST_DATA)), test.last_error

    TEST_SCHEMA_MAX = {"fields": [{"name": "number", "type": "numeric", "max": 750}]}
    test = Schema(TEST_SCHEMA_MAX)
    assert test.validate(UNDER_TEST_DATA), test.last_error
    assert not (test.validate(OVER_TEST_DATA)), test.last_error