示例#1
0
def test_column_regex():
    """Test that column regex work on single-level column index."""
    column_schema = Column(
        Int, Check(lambda s: s >= 0), name="foo_*", regex=True
    )

    dataframe_schema = DataFrameSchema(
        {
            "foo_*": Column(Int, Check(lambda s: s >= 0), regex=True),
        }
    )

    data = pd.DataFrame(
        {
            "foo_1": range(10),
            "foo_2": range(10, 20),
            "foo_3": range(20, 30),
            "bar_1": range(10),
            "bar_2": range(10, 20),
            "bar_3": range(20, 30),
        }
    )
    assert isinstance(column_schema.validate(data), pd.DataFrame)
    assert isinstance(dataframe_schema.validate(data), pd.DataFrame)

    # Raise an error on multi-index column case
    data.columns = pd.MultiIndex.from_tuples(
        (
            ("foo_1", "biz_1"),
            ("foo_2", "baz_1"),
            ("foo_3", "baz_2"),
            ("bar_1", "biz_2"),
            ("bar_2", "biz_3"),
            ("bar_3", "biz_3"),
        )
    )
    with pytest.raises(IndexError):
        column_schema.validate(data)
    with pytest.raises(IndexError):
        dataframe_schema.validate(data)
示例#2
0
def test_column_regex_multiindex():
    """Text that column regex works on multi-index column."""
    column_schema = Column(
        Int,
        Check(lambda s: s >= 0),
        name=("foo_*", "baz_*"),
        regex=True,
    )
    dataframe_schema = DataFrameSchema({
        ("foo_*", "baz_*"):
        Column(Int, Check(lambda s: s >= 0), regex=True),
    })

    data = pd.DataFrame({
        ("foo_1", "biz_1"): range(10),
        ("foo_2", "baz_1"): range(10, 20),
        ("foo_3", "baz_2"): range(20, 30),
        ("bar_1", "biz_2"): range(10),
        ("bar_2", "biz_3"): range(10, 20),
        ("bar_3", "biz_3"): range(20, 30),
    })
    assert isinstance(column_schema.validate(data), pd.DataFrame)
    assert isinstance(dataframe_schema.validate(data), pd.DataFrame)

    # Raise an error if tuple column name is applied to a dataframe with a
    # flat pd.Index object.
    failure_column_cases = (
        [f"foo_{i}" for i in range(6)],
        pd.MultiIndex.from_tuples([(f"foo_{i}", f"bar_{i}", f"baz_{i}")
                                   for i in range(6)]),
    )
    for columns in failure_column_cases:
        data.columns = columns
        with pytest.raises(IndexError):
            column_schema.validate(data)
        with pytest.raises(IndexError):
            dataframe_schema.validate(data)
def test_coerce_nullable_object_column():
    """Test that Object dtype coercing preserves object types."""
    df_objects_with_na = pd.DataFrame(
        {"col": [1, 2.0, [1, 2, 3], {
            "a": 1
        }, np.nan, None]})

    column_schema = Column(Object, name="col", coerce=True, nullable=True)

    validated_df = column_schema.validate(df_objects_with_na)
    assert isinstance(validated_df, pd.DataFrame)
    assert pd.isna(validated_df["col"].iloc[-1])
    assert pd.isna(validated_df["col"].iloc[-2])
    for i in range(4):
        isinstance(validated_df["col"].iloc[i],
                   type(df_objects_with_na["col"].iloc[i]))