示例#1
0
def test_missing_column_validation():
    column_constraints = [
        PandasColumn(name="qux", constraints=[ColumnDTypeInSetConstraint({"object"})]),
    ]
    dataframe = DataFrame({"foo": ["bar", "baz"]})
    with pytest.raises(
        ConstraintViolationException, match="Required column qux not in dataframe with columns"
    ):
        validate_constraints(dataframe, pandas_columns=column_constraints)
示例#2
0
def test_missing_column_validation():
    column_constraints = [
        PandasColumn(name='qux',
                     constraints=[ColumnDTypeInSetConstraint({'object'})]),
    ]
    dataframe = DataFrame({'foo': ['bar', 'baz']})
    with pytest.raises(
            ConstraintViolationException,
            match="Required column qux not in dataframe with columns"):
        validate_constraints(dataframe, pandas_columns=column_constraints)
示例#3
0
def test_shape_validation_throw_error():
    with pytest.raises(ConstraintViolationException):
        validate_constraints(
            DataFrame({"foo": [2], "bar": ["hello"]}),
            pandas_columns=[
                PandasColumn.integer_column("foo", min_value=0),
                PandasColumn.string_column("bar"),
            ],
            dataframe_constraints=[RowCountConstraint(2)],
        )
示例#4
0
def test_shape_validation_without_column_constraints():
    assert (
        validate_constraints(
            DataFrame({"foo": [2], "bar": ["hello"]}), dataframe_constraints=[RowCountConstraint(1)]
        )
        is None
    )

    with pytest.raises(ConstraintViolationException):
        validate_constraints(
            DataFrame({"foo": [2], "bar": ["hello"]}), dataframe_constraints=[RowCountConstraint(2)]
        )
示例#5
0
def test_datetime_column_with_tz_validation_fails_when_incorrect_tz():
    with pytest.raises(ConstraintViolationException):
        validate_constraints(
            DataFrame(
                {
                    "datetime_utc": [Timestamp("2021-03-14T12:34:56")],
                }
            ),
            pandas_columns=[
                PandasColumn.datetime_column("datetime_utc", tz="UTC"),
            ],
        )
示例#6
0
def test_shape_validation_throw_error():
    with pytest.raises(ConstraintViolationException):
        validate_constraints(
            DataFrame({
                'foo': [2],
                'bar': ['hello']
            }),
            pandas_columns=[
                PandasColumn.integer_column('foo', min_value=0),
                PandasColumn.string_column('bar'),
            ],
            dataframe_constraints=[RowCountConstraint(2)],
        )
示例#7
0
def test_validate_constraints_ok():
    column_constraints = [
        PandasColumn(name='foo', constraints=[ColumnTypeConstraint('object')]),
    ]
    dataframe = DataFrame({'foo': ['bar', 'baz']})
    assert validate_constraints(dataframe,
                                pandas_columns=column_constraints) is None
示例#8
0
def test_datetime_column_with_min_max_constraints_ok():
    assert (
        validate_constraints(
            DataFrame(
                {
                    "datetime": [Timestamp("2021-03-14T12:34:56")],
                    "datetime_utc_min_max_no_tz": [Timestamp("2021-03-14T12:34:56Z")],
                    "datetime_utc_min_max_same_tz": [Timestamp("2021-03-14T12:34:56Z")],
                    "datetime_utc_min_max_from_different_tz": [Timestamp("2021-03-14T12:34:56Z")],
                }
            ),
            pandas_columns=[
                PandasColumn.datetime_column(
                    "datetime_utc_min_max_no_tz",
                    tz="UTC",
                    min_datetime=Timestamp.min,
                    max_datetime=Timestamp.max,
                ),
                PandasColumn.datetime_column(
                    "datetime_utc_min_max_same_tz",
                    tz="UTC",
                    min_datetime=Timestamp("2021-01-01T00:00:00Z"),
                    max_datetime=Timestamp("2021-12-01T00:00:00Z"),
                ),
                PandasColumn.datetime_column(
                    "datetime_utc_min_max_from_different_tz",
                    tz="UTC",
                    min_datetime=Timestamp("2021-01-01T00:00:00Z", tz="US/Eastern"),
                    max_datetime=Timestamp("2021-12-01T00:00:00Z"),
                ),
            ],
        )
        is None
    )
示例#9
0
def test_missing_column_validation_with_optional_column():
    column_constraints = [
        PandasColumn(
            name="qux", constraints=[ColumnDTypeInSetConstraint({"object"})], is_required=False
        ),
    ]
    dataframe = DataFrame({"foo": ["bar", "baz"]})
    assert validate_constraints(dataframe, pandas_columns=column_constraints) is None
示例#10
0
def test_missing_column_validation_with_optional_column():
    column_constraints = [
        PandasColumn(name='qux',
                     constraints=[ColumnDTypeInSetConstraint({'object'})],
                     is_required=False),
    ]
    dataframe = DataFrame({'foo': ['bar', 'baz']})
    assert validate_constraints(dataframe,
                                pandas_columns=column_constraints) is None
示例#11
0
    def _dagster_type_check(_, value):
        if not isinstance(value, pd.DataFrame):
            return TypeCheck(
                success=False,
                description=
                'Must be a pandas.DataFrame. Got value of type. {type_name}'.
                format(type_name=type(value).__name__),
            )

        try:
            validate_constraints(value,
                                 pandas_columns=columns,
                                 dataframe_constraints=dataframe_constraints)
        except ConstraintViolationException as e:
            return TypeCheck(success=False, description=str(e))

        return TypeCheck(
            success=True,
            metadata_entries=_execute_summary_stats(
                name, value, event_metadata_fn) if event_metadata_fn else None,
        )
示例#12
0
def test_shape_validation_ok():
    assert (validate_constraints(
        DataFrame({
            'foo': [2],
            'bar': ['hello']
        }),
        pandas_columns=[
            PandasColumn.integer_column('foo', min_value=0),
            PandasColumn.string_column('bar'),
        ],
        dataframe_constraints=[RowCountConstraint(1)],
    ) is None)
示例#13
0
def test_shape_validation_ok():
    assert (
        validate_constraints(
            DataFrame({"foo": [2], "bar": ["hello"]}),
            pandas_columns=[
                PandasColumn.integer_column("foo", min_value=0),
                PandasColumn.string_column("bar"),
            ],
            dataframe_constraints=[RowCountConstraint(1)],
        )
        is None
    )
示例#14
0
def test_datetime_column_with_tz_validation_ok():
    assert (
        validate_constraints(
            DataFrame(
                {
                    "datetime": [Timestamp("2021-03-14T12:34:56")],
                    "datetime_utc": [Timestamp("2021-03-14T12:34:56Z")],
                    "datetime_dublin": [Timestamp("2021-03-14T12:34:56", tz="Europe/Dublin")],
                    "datetime_est": [Timestamp("2021-03-14T12:34:56", tz="US/Eastern")],
                    "datetime_chatham": [Timestamp("2021-03-14T12:34:56", tz="Pacific/Chatham")],
                    "datetime_utc_with_min_max": [Timestamp("2021-03-14T12:34:56Z")],
                }
            ),
            pandas_columns=[
                PandasColumn.datetime_column("datetime"),
                PandasColumn.datetime_column("datetime_utc", tz="UTC"),
                PandasColumn.datetime_column("datetime_dublin", tz="Europe/Dublin"),
                PandasColumn.datetime_column("datetime_est", tz="US/Eastern"),
                PandasColumn.datetime_column("datetime_chatham", tz="Pacific/Chatham"),
            ],
        )
        is None
    )
示例#15
0
def test_validate_constraints_throw_error(column_constraints, dataframe):
    with pytest.raises(ConstraintViolationException):
        validate_constraints(dataframe, pandas_columns=column_constraints)
示例#16
0
def test_validate_constraints_ok():
    column_constraints = [
        PandasColumn(name="foo", constraints=[ColumnDTypeInSetConstraint({"object"})]),
    ]
    dataframe = DataFrame({"foo": ["bar", "baz"]})
    assert validate_constraints(dataframe, pandas_columns=column_constraints) is None