Пример #1
0
def test_join():
    df_left = DataFrame(
        {
            "a": ["a", "b", "a", "z"],
            "b": [1, 2, 3, 4],
            "c": [6, 5, 4, 3],
        }
    )
    df_right = DataFrame(
        {
            "a": ["b", "c", "b", "a"],
            "k": [0, 3, 9, 6],
            "c": [1, 0, 2, 1],
        }
    )

    joined = df_left.join(df_right, left_on="a", right_on="a").sort("a")
    assert joined["b"].series_equal(Series("", [1, 3, 2, 2]))
    joined = df_left.join(df_right, left_on="a", right_on="a", how="left").sort("a")
    assert joined["c_right"].is_null().sum() == 1
    assert joined["b"].series_equal(Series("", [1, 3, 2, 2, 4]))
    joined = df_left.join(df_right, left_on="a", right_on="a", how="outer").sort("a")
    assert joined["c_right"].null_count() == 1
    assert joined["c"].null_count() == 2
    assert joined["b"].null_count() == 2
Пример #2
0
def test_downsample():
    s = Series(
        "datetime",
        [
            946684800000,
            946684860000,
            946684920000,
            946684980000,
            946685040000,
            946685100000,
            946685160000,
            946685220000,
            946685280000,
            946685340000,
            946685400000,
            946685460000,
            946685520000,
            946685580000,
            946685640000,
            946685700000,
            946685760000,
            946685820000,
            946685880000,
            946685940000,
        ],
    ).cast(Date64)
    s2 = s.clone()
    df = DataFrame({"a": s, "b": s2})
    out = df.downsample("a", rule="minute", n=5).first()
    assert out.shape == (4, 2)
Пример #3
0
def test_rechunk():
    a = Series("a", [1, 2, 3])
    b = Series("b", [4, 5, 6])
    a.append(b)
    assert a.n_chunks() == 2
    assert a.rechunk(in_place=False).n_chunks() == 1
    a.rechunk(in_place=True)
    assert a.n_chunks() == 1
Пример #4
0
def test_ufunc():
    a = Series("a", [1.0, 2.0, 3.0, 4.0])
    b = np.multiply(a, 4)
    assert isinstance(b, Series)
    assert b == [4, 8, 12, 16]

    # test if null bitmask is preserved
    a = Series("a", [1.0, None, 3.0], nullable=True)
    b = np.exp(a)
    assert b.null_count() == 1
Пример #5
0
def test_cast():
    a = Series("a", range(20))

    assert a.cast_f32().dtype == Float32
    assert a.cast_f64().dtype == Float64
    assert a.cast_i32().dtype == Int32
    assert a.cast_u32().dtype == UInt32
    assert a.cast_date64().dtype == Date64
    assert a.cast_date32().dtype == Date32
Пример #6
0
def test_filter():
    a = Series("a", range(20))
    assert a[a > 1].len() == 18
    assert a[a < 1].len() == 1
    assert a[a <= 1].len() == 2
    assert a[a >= 1].len() == 19
    assert a[a == 1].len() == 1
    assert a[a != 1].len() == 19
Пример #7
0
def test_replace():
    df = DataFrame({"a": [2, 1, 3], "b": [1, 2, 3]})
    s = Series("c", [True, False, True])
    df.replace("a", s)
    assert df.frame_equal(DataFrame({
        "c": [True, False, True],
        "b": [1, 2, 3]
    }))
Пример #8
0
def test_apply():
    a = Series("a", [1, 2, None], nullable=True)
    b = a.apply(lambda x: x**2)
    assert b == [1, 4, None]

    a = Series("a", ["foo", "bar", None], nullable=True)
    b = a.apply(lambda x: x + "py")
    assert b == ["foopy", "barpy", None]

    b = a.apply(lambda x: len(x), dtype_out=Int32)
    assert b == [3, 3, None]

    with pytest.raises(TypeError):
        a.apply(lambda x: len(x))
Пример #9
0
def test_to_pandas():
    df = get_complete_df()
    df.to_arrow()
    df.to_pandas()
    # test shifted df
    df.shift(2).to_pandas()
    df = DataFrame({"col": Series([True, False, True])})
    print(df)
    df.shift(2).to_pandas()
Пример #10
0
def test_to_python():
    a = Series("a", range(20))
    b = a.to_list()
    assert isinstance(b, list)
    assert len(b) == 20

    a = Series("a", [1, None, 2], nullable=True)
    assert a.null_count() == 1
    assert a.to_list() == [1, None, 2]
Пример #11
0
def test_custom_groupby():
    df = DataFrame({"A": ["a", "a", "c", "c"], "B": [1, 3, 5, 2]})
    assert df.groupby("A").select("B").apply(lambda x: x.sum()).shape == (2, 2)
    assert df.groupby("A").select("B").apply(
        lambda x: Series("", np.array(x))).shape == (2, 2)

    df = DataFrame({"a": [1, 2, 1, 1], "b": ["a", "b", "c", "c"]})

    out = (df.lazy().groupby("b").agg(
        [col("a").apply_groups(lambda x: x.sum(), dtype_out=int)]).collect())
    assert out.shape == (3, 2)
Пример #12
0
def test_join():
    df_left = DataFrame({
        "a": ["a", "b", "a", "z"],
        "b": [1, 2, 3, 4],
        "c": [6, 5, 4, 3],
    })
    df_right = DataFrame({
        "a": ["b", "c", "b", "a"],
        "k": [0, 3, 9, 6],
        "c": [1, 0, 2, 1],
    })

    joined = df_left.join(df_right, left_on="a", right_on="a").sort("a")
    assert joined["b"].series_equal(Series("", [1, 3, 2, 2]))
    joined = df_left.join(df_right, left_on="a", right_on="a",
                          how="left").sort("a")
    assert joined["c_right"].is_null().sum() == 1
    assert joined["b"].series_equal(Series("", [1, 3, 2, 2, 4]))
    joined = df_left.join(df_right, left_on="a", right_on="a",
                          how="outer").sort("a")
    assert joined["c_right"].null_count() == 1
    assert joined["c"].null_count() == 2
    assert joined["b"].null_count() == 2

    df_a = DataFrame({"a": [1, 2, 1, 1], "b": ["a", "b", "c", "c"]})
    df_b = DataFrame({
        "foo": [1, 1, 1],
        "bar": ["a", "c", "c"],
        "ham": ["let", "var", "const"]
    })

    # just check if join on multiple columns runs
    df_a.join(df_b, left_on=["a", "b"], right_on=["foo", "bar"])

    eager_join = df_a.join(df_b, left_on="a", right_on="foo")

    lazy_join = df_a.lazy().join(df_b.lazy(), left_on="a",
                                 right_on="foo").collect()
    assert lazy_join.shape == eager_join.shape
Пример #13
0
def test_downsample():
    s = Series(
        "datetime",
        [
            946684800000,
            946684860000,
            946684920000,
            946684980000,
            946685040000,
            946685100000,
            946685160000,
            946685220000,
            946685280000,
            946685340000,
            946685400000,
            946685460000,
            946685520000,
            946685580000,
            946685640000,
            946685700000,
            946685760000,
            946685820000,
            946685880000,
            946685940000,
        ],
    ).cast(Date64)
    s2 = s.clone()
    df = DataFrame({"a": s, "b": s2})
    out = df.downsample("a", rule="minute", n=5).first()
    assert out.shape == (4, 2)

    # OLHC
    out = df.downsample("a", rule="minute",
                        n=5).agg({"b": ["first", "min", "max", "last"]})
    assert out.shape == (4, 5)

    # test to_pandas as well.
    out = df.to_pandas()
    assert out["a"].dtype == "datetime64[ns]"
Пример #14
0
def test_apply():
    a = Series("a", [1, 2, None], nullable=True)
    b = a.apply(lambda x: x**2, sniff_dtype=False)
    assert b == [1, 4, None]

    a = Series("a", ["foo", "bar", None], nullable=True)
    b = a.apply(lambda x: x + "py")
    assert b == ["foopy", "barpy", None]

    b = a.apply(lambda x: len(x), dtype_out=Int32)
    assert b == [3, 3, None]

    # with out dtype sniffing
    b = a.apply(lambda x: len(x))
    assert b == [3, 3, None]
Пример #15
0
def test_cast():
    a = Series("a", range(20))

    assert a.cast(Float32).dtype == Float32
    assert a.cast(Float64).dtype == Float64
    assert a.cast(Int32).dtype == Int32
    assert a.cast(UInt32).dtype == UInt32
    assert a.cast(Date64).dtype == Date64
    assert a.cast(Date32).dtype == Date32
Пример #16
0
def test_equality():
    a = create_series()
    b = a

    cmp = a == b
    assert isinstance(cmp, Series)
    assert cmp.sum() == 2
    assert (a != b).sum() == 0
    assert (a >= b).sum() == 2
    assert (a <= b).sum() == 2
    assert (a > b).sum() == 0
    assert (a < b).sum() == 0
    assert a.sum() == 3
    assert a.series_equal(b)

    a = Series("name", ["ham", "foo", "bar"])
    assert (a == "ham").to_list() == [True, False, False]
Пример #17
0
def test_to_pandas():
    df = DataFrame({
        "bools": [False, True, False],
        "bools_nulls": [None, True, False],
        "int": [1, 2, 3],
        "int_nulls": [1, None, 3],
        "floats": [1.0, 2.0, 3.0],
        "floats_nulls": [1.0, None, 3.0],
        "strings": ["foo", "bar", "ham"],
        "strings_nulls": ["foo", None, "ham"],
    })
    df.to_arrow()
    df.to_pandas()
    # test shifted df
    df.shift(2).to_pandas()
    df = DataFrame({"col": Series([True, False, True])})
    print(df)
    df.shift(2).to_pandas()
Пример #18
0
def test_parse_date(dtype, fmt, null_values):
    dates = ["25-08-1988", "20-01-1993", "25-09-2020"]
    result = Series.parse_date("dates", dates, dtype, fmt)
    # Why results Date64 into `nan`?
    assert result.dtype == dtype
    assert result.is_null().sum() == null_values
Пример #19
0
def test_fold():
    df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]})
    out = df.lazy().select(pl.sum(["a", "b"])).collect()
    assert out["sum"].series_equal(Series("sum", [2, 4, 6]))
Пример #20
0
def test_hstack():
    df = DataFrame({"a": [2, 1, 3], "b": ["a", "b", "c"]})
    df.hstack([Series("stacked", [-1, -1, -1])], in_place=True)
    assert df.shape == (3, 3)
    assert df.columns == ["a", "b", "stacked"]
Пример #21
0
def test_rolling():
    a = Series("a", [1, 2, 3, 2, 1])
    assert a.rolling_min(2) == [None, 1, 2, 2, 1]
    assert a.rolling_max(2) == [None, 2, 3, 3, 2]
    assert a.rolling_sum(2) == [None, 3, 5, 5, 3]
Пример #22
0
def mkdiff(cumcases: pl.Series) -> pl.Series:
    """
    Creates a new Series with differences per row
    """
    return cumcases - cumcases.shift(1)
Пример #23
0
def test_object():
    vals = [[12], "foo", 9]
    a = Series("a", vals)
    assert a.dtype == Object
    assert a.to_list() == vals
    assert a[1] == "foo"
Пример #24
0
def create_series():
    return Series("a", [1, 2])
Пример #25
0
def test_shift():
    a = Series("a", [1, 2, 3])
    assert a.shift(1) == [None, 1, 2]
    assert a.shift(-1) == [1, 2, None]
    assert a.shift(-2) == [1, None, None]
Пример #26
0
def test_various():
    a = create_series()

    assert a.is_null().sum() == 0
    assert a.name == "a"
    a.rename("b")
    assert a.name == "b"
    assert a.len() == 2
    assert len(a) == 2
    b = a.slice(1, 1)
    assert b.len() == 1
    assert b.series_equal(Series("", [2]))
    a.append(b)
    assert a.series_equal(Series("", [1, 2, 2]))

    a = Series("a", range(20))
    assert a.head(5).len() == 5
    assert a.tail(5).len() == 5
    assert a.head(5) != a.tail(5)

    a = Series("a", [2, 1, 4])
    a.sort(in_place=True)
    assert a.series_equal(Series("", [1, 2, 4]))
    a = Series("a", [2, 1, 1, 4, 4, 4])
    assert list(a.arg_unique()) == [0, 1, 3]

    assert a.take([2, 3]).series_equal(Series("", [1, 4]))
    assert a.is_numeric()
    a = Series("bool", [True, False])
    assert not a.is_numeric()
Пример #27
0
def test_sort():
    a = Series("a", [2, 1, 3])
    assert a.sort().to_list() == [1, 2, 3]
    assert a.sort(reverse=True) == [3, 2, 1]
Пример #28
0
def test_fill_none():
    a = Series("a", [1, 2, None], nullable=True)
    b = a.fill_none("forward")
    assert b == [1, 2, 2]
Пример #29
0
def test_get():
    a = Series("a", [1, 2, 3])
    assert a[0] == 1
    assert a[:2] == [1, 2]
Пример #30
0
def test_view():
    a = Series("a", [1.0, 2.0, 3.0])
    assert isinstance(a.view(), np.ndarray)
    assert np.all(a.view() == np.array([1, 2, 3]))