Exemplos de mutate em Python, exemplos de datar.dplyr.mutate em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_group_by.py Projeto: pwwang/datar

def test_auto_splicing():
    df1 = iris >> group_by(f.Species)
    df2 = iris >> group_by(tibble(Species=iris.Species))
    assert df1.equals(df2)

    df1 = iris >> group_by(f.Species)
    df2 = iris >> group_by(across(f.Species))
    assert df1.equals(df2)

    df1 = (
        iris
        >> mutate(across(starts_with("Sepal"), round))
        >> group_by(f.Sepal_Length, f.Sepal_Width)
    )
    df2 = iris >> group_by(across(starts_with("Sepal"), round))
    assert df1.equals(df2)

    # across(character()), across(NULL) not supported

    df1 = (
        iris
        >> mutate(across(starts_with("Sepal"), round))
        >> group_by(f.Sepal_Length, f.Sepal_Width, f.Species)
    )
    df2 = iris >> group_by(across(starts_with("Sepal"), round), f.Species)
    assert df1.equals(df2)

    df1 = (
        iris
        >> mutate(across(starts_with("Sepal"), round))
        >> group_by(f.Species, f.Sepal_Length, f.Sepal_Width)
    )
    df2 = iris >> group_by(f.Species, across(starts_with("Sepal"), round))
    assert df1.equals(df2)

Exemplo n.º 2

0

Exibir arquivo

def test_length1_vectors_are_recycled():
    df = tibble(x=range(1, 5))
    out = mutate(df, y=1)
    assert out.y.tolist() == [1, 1, 1, 1]

    with pytest.raises(ValueError, match="does not match length"):
        mutate(df, y=[1, 2])

Exemplo n.º 3

0

Exibir arquivo

def test_row_number_with_groups():
    df = tibble(x=[3, 3, 4, 4]).group_by("x")
    out = df >> mutate(n=row_number())
    assert_iterable_equal(out.n.obj, [1, 2, 1, 2])

    out = df >> mutate(n=row_number() + 1)
    assert_iterable_equal(out.n.obj, [2, 3, 2, 3])

Exemplo n.º 4

0

Exibir arquivo

def test_preserves_grouping():
    gf = group_by(tibble(x=[1, 2], y=2), f.x)
    out = mutate(gf, x=1)
    assert group_vars(out) == ["x"]
    assert nrow(group_data(out)) == 1

    out = mutate(gf, z=1)
    assert group_data(out).equals(group_data(gf))

Exemplo n.º 5

0

Exibir arquivo

def test_preserves_names():
    df = tibble(a=range(1, 4))
    # note it's treated as data frame
    out1 = df >> mutate(b=tibble(**dict(zip(letters[:3], [0, 1, 2]))))
    out2 = df >> mutate(b=tibble(**dict(zip(letters[:3], [[0], [1], [2]]))))

    assert_iterable_equal(out1["b"].columns, list("abc"))
    assert_iterable_equal(out2["b"].columns, list("abc"))

Exemplo n.º 6

0

Exibir arquivo

def test_keep_none_only_keeps_grouping_variables():
    df = tibble(x=1, y=2)
    gf = group_by(df, f.x)

    out = mutate(df, z=1, _keep="none")
    assert out.columns.tolist() == ["z"]
    out = mutate(gf, z=1, _keep="none")
    assert out.columns.tolist() == ["x", "z"]

Exemplo n.º 7

0

Exibir arquivo

def test_unnamed_data_frames_are_automatically_unspliced():
    out = tibble(a=1) >> mutate(tibble(b=2))
    assert_tibble_equal(out, tibble(a=1, b=2))

    out = tibble(a=1) >> mutate(tibble(b=2), tibble(b=3))
    assert_tibble_equal(out, tibble(a=1, b=3))

    out = tibble(a=1) >> mutate(tibble(b=2), c=f.b)
    assert_tibble_equal(out, tibble(a=1, b=2, c=2))

Exemplo n.º 8

0

Exibir arquivo

def test_return_one_row():
    # not actually one row, but returns a corresponding series
    df = tibble(x=range(1, 43))
    out = df >> mutate(across(c(), as_factor))
    assert out.equals(df)

    out = df >> mutate(y=across(c(), as_factor))
    # empty column in pandas will be NAs
    assert out.y.isna().all()

Exemplo n.º 9

0

Exibir arquivo

def test_deals_with_0_groups():
    df = tibble(x=[]) >> group_by(f.x)
    out = mutate(df, y=f.x + 1)
    exp = tibble(x=[], y=[]) >> group_by(f.x)
    assert_iterable_equal(out, exp)
    assert group_vars(out) == group_vars(exp)

    out = mutate(df, y=max(f.x))
    assert out.shape == (0, 2)
    assert group_vars(out) == ["x"]

Exemplo n.º 10

0

Exibir arquivo

def test_cache_key():
    df = tibble(g=rep([1, 2], each=2), a=range(1, 5)) >> group_by(f.g)

    out = df >> mutate(
        tibble(
            x=across(where(is_numeric), mean).a,
            y=across(where(is_numeric), max).a,
        ))
    expect = df >> mutate(x=mean(f.a), y=max(f.a))
    assert_frame_equal(out, expect)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: test_group_by.py Projeto: pwwang/datar

def test_group_by_keeps_the_right_order_of_subdfs():
    df = (
        tibble(
            g1=["a", "b", "c", "a", "b", "c", "a", "b", "c"],
            g2=["a", "b", "c", "a", "b", "c", "a", "b", "b"],
        )
        >> mutate(x=range(9))
    )
    out = df >> group_by(f.g1, f.g2) >> mutate(x=f.x)
    assert_iterable_equal(out.x.obj, range(9))

Exemplo n.º 12

0

Exibir arquivo

def test_works_on_empty_data_frames():
    df = tibble()
    res = df >> mutate()
    assert nrow(res) == 0
    assert len(res) == 0

    res = df >> mutate(x=[])
    assert res.columns.tolist() == ["x"]
    assert nrow(res) == 0
    assert ncol(res) == 1

Exemplo n.º 13

0

Exibir arquivo

def test_handles_data_frame_columns():
    df = tibble(a=c(1, 2, 3), b=c(2, 3, 4), base_col=c(3, 4, 5))
    res = mutate(df, new_col=tibble(x=[1, 2, 3]))
    assert_tibble_equal(res["new_col"], tibble(x=[1, 2, 3]))

    res = mutate(group_by(df, f.a), new_col=tibble(x=f.a))
    assert_iterable_equal(res["new_col"].x.obj, [1, 2, 3])

    rf = rowwise(df, f.a)
    res = mutate(rf, new_col=tibble(x=f.a))
    assert_tibble_equal(res["new_col"], tibble(x=[1, 2, 3]) >> rowwise())

Exemplo n.º 14

0

Exibir arquivo

def test_lead_lag_inside_mutates_handles_expressions_as_value_for_default():
    df = tibble(x=[1, 2, 3])
    res = mutate(df,
                 leadn=lead(f.x, default=f.x[0]),
                 lagn=lag(f.x, default=f.x[0]))
    assert_iterable_equal(res.leadn, lead(df.x, default=df.x[0]))
    assert_iterable_equal(res.lagn, lag(df.x, default=df.x[0]))

    res = mutate(df, leadn=lead(f.x, default=[1]), lagn=lag(f.x, default=[1]))
    assert_iterable_equal(res.leadn, lead(df.x, default=[1]))
    assert_iterable_equal(res.lagn, lag(df.x, default=[1]))

Exemplo n.º 15

0

Exibir arquivo

def test_mutate_cols_inside_func():
    df = tibble(x=2, y=4, z=8)

    @register_func(None, context=None)
    def data_frame(**kwargs):
        return tibble(**kwargs)

    out = df >> mutate(data_frame(x=f.x / f.y, y=f.y / f.y, z=f.z / f.y))
    # df.y does not work on grouped data
    expect = df >> mutate(across(everything(), lambda col: col / df.y))
    assert out.equals(expect)

Exemplo n.º 16

0

Exibir arquivo

def test_works_sequentially():

    df = tibble(a=1)
    out = df >> mutate(x=ncol(across(where(is_numeric))),
                       y=ncol(across(where(is_numeric))))
    expect = tibble(a=1, x=1, y=2)
    assert out.equals(expect)

    out = df >> mutate(a="x", y=ncol(across(where(is_numeric))))
    expect = tibble(a="x", y=0)
    assert out.equals(expect)

Exemplo n.º 17

0

Exibir arquivo

def test_empty_mutate_returns_input():
    df = tibble(x=1)
    gf = group_by(df, f.x)

    out = mutate(df)
    assert out.equals(df)

    out = mutate(gf)
    assert_tibble_equal(out, gf)
    assert isinstance(gf, TibbleGrouped)
    assert group_vars(out) == ["x"]

Exemplo n.º 18

0

Exibir arquivo

def test_can_use_before_and_after_to_control_column_position():
    df = tibble(x=1, y=2)
    out = mutate(df, z=1)
    assert out.columns.tolist() == ["x", "y", "z"]
    out = mutate(df, z=1, _before=1)
    assert out.columns.tolist() == ["x", "z", "y"]
    out = mutate(df, z=1, _after=0)
    assert out.columns.tolist() == ["x", "z", "y"]

    df = tibble(x=1, y=2)
    out = mutate(df, x=1, _after=f.y)
    assert out.columns.tolist() == ["x", "y"]

Exemplo n.º 19

0

Exibir arquivo

def test_if_any_all_enforce_bool():
    d = tibble(x=10, y=10)
    out = d >> filter(if_all(f[f.x:f.y], identity))
    assert_frame_equal(out, d)

    out = d >> filter(if_any(f[f.x:f.y], identity))
    assert_frame_equal(out, d)

    out = d >> mutate(ok=if_all(f[f.x:f.y], identity))
    assert_frame_equal(out, mutate(d, ok=True))

    out = d >> mutate(ok=if_any(f[f.x:f.y], identity))
    assert_frame_equal(out, mutate(d, ok=True))

Exemplo n.º 20

0

Exibir arquivo

Arquivo: test_context.py Projeto: pwwang/datar

def test_cur_data_all_sequentially():
    df = tibble(a=1)
    out = df >> mutate(
        x=cur_data().transform(ncol), y=cur_data().transform(ncol)
    )
    expect = tibble(a=1, x=1, y=2)
    assert out.equals(expect)

    gf = tibble(a=1, b=2) >> group_by(f.a)
    out = gf >> mutate(
        x=cur_data_all().transform(ncol), y=cur_data_all().transform(ncol)
    )
    expect = tibble(a=1, b=2, x=2, y=3)
    assert out.equals(expect)

Exemplo n.º 21

0

Exibir arquivo

def test_applied_progressively():
    df = tibble(x=1)
    out = df >> mutate(y=f['x'] + 1, z=f.y + 1)
    assert_tibble_equal(out, tibble(x=1, y=2, z=3))

    out = df >> mutate(y=f.x + 1, x=f.y + 1)
    assert_tibble_equal(out, tibble(x=3, y=2))

    out = df >> mutate(x=2, y=f.x)
    assert_tibble_equal(out, tibble(x=2, y=2))

    df = tibble(x=1, y=2)
    out1 = df >> mutate(x2=f.x, x3=f.x2 + 1)
    out2 = df >> mutate(x2=f.x + 0, x3=f.x2 + 1)
    assert_tibble_equal(out1, out2)

Exemplo n.º 22

0

Exibir arquivo

def test_attrgetter():
    df = tibble(x=list("abc"))

    out = df >> mutate(y=attrgetter(f.x, "str").upper())
    assert_iterable_equal(out.y, ["A", "B", "C"])

    out = df >> mutate(y=pd_str(f.x).upper())
    assert_iterable_equal(out.y, ["A", "B", "C"])

    gf = df >> group_by(g=1)
    out = gf >> mutate(y=attrgetter(f.x, "str").upper())
    assert_iterable_equal(out.y.obj, ["A", "B", "C"])

    out = gf >> mutate(y=pd_str(f.x).upper())
    assert_iterable_equal(out.y.obj, ["A", "B", "C"])

Exemplo n.º 23

0

Exibir arquivo

Arquivo: test_group_by.py Projeto: pwwang/datar

def test_rowwise_preserved_by_major_verbs():
    rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x)

    out = arrange(rf, f.y)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = filter(rf, f.x < 3)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = mutate(rf, x=f.x + 1)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = rename(rf, X=f.x)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["X"]

    out = select(rf, "x")
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = slice(rf, c(0, 0))
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    # Except for summarise
    out = summarise(rf, z=mean(f.x, f.y))
    assert isinstance(out, TibbleGrouped)
    assert group_vars(out) == ["x"]

Exemplo n.º 24

0

Exibir arquivo

Arquivo: test_distinct.py Projeto: pwwang/datar

def test_mutate_internally():
    df = tibble(g=c(1, 2), x=c(1, 2))

    df1 = df >> distinct(aa=f.g * 2)
    df2 = df >> mutate(aa=f.g * 2) >> distinct(f.aa)

    assert df1.equals(df2)

Exemplo n.º 25

0

Exibir arquivo

Arquivo: test_order_by.py Projeto: pwwang/datar

def test_order_by():
    df = tibble(x=f[1:6])
    out = df >> mutate(y=order_by(f[5:], cumsum(f.x)))
    assert_iterable_equal(out.y, [15, 14, 12, 9, 5])

    with pytest.raises(ValueError):
        order_by(seq(5, 1), cumsum(seq(1, 5)))

Exemplo n.º 26

0

Exibir arquivo

def test_0col_df_in_results_ignored():
    df1 = tibble(x=[1, 2])
    df2 = df1 >> group_by(f.x) >> summarise(tibble())
    assert df2.equals(df1)

    df2 = df1 >> group_by(f.x) >> summarise(tibble(), y=65)
    df3 = df1 >> mutate(y=65)
    assert df2.equals(df3)

    df2 = tibble(x=[1, 2], y=[3, 4])
    df3 = df2 >> group_by(f.x) >> summarise(tibble())
    assert df3.equals(df1)

    df3 = df2 >> group_by(f.x) >> summarise(tibble(), z=98)
    df4 = df1 >> mutate(z=98)
    assert df3.equals(df4)

Exemplo n.º 27

0

Exibir arquivo

Arquivo: BedConsensus.py Projeto: pwwang/biopipen

def read_bed(bedfile, bedidx):
    """Read BED file."""
    _log("- Reading BED file:", bedfile)
    ofile = outfile.parent / f"_{stems[bedidx]}.bed"
    df = pandas.read_csv(bedfile, sep="\t", header=None)
    header = [
        "chrom",
        "start",
        "end",
        "name",
        "score",
        "strand",
        "thickStart",
        "thickEnd",
        "itemRgb",
        "blockCount",
        "blockSizes",
        "blockStarts",
    ]
    df.columns = header[:len(df.columns)]
    if "score" in df.columns and bedidx not in ignore_scores:
        ofile = bedfile
    else:
        df = df >> mutate(score=f.end - f.start)
        df.to_csv(ofile, sep="\t", index=False, header=False)

    return ofile

Exemplo n.º 28

0

Exibir arquivo

Arquivo: test_group_by.py Projeto: pwwang/datar

def test_zero_row_dfs():
    df = tibble(a=[], b=[], g=[])
    dfg = group_by(df, f.g, _drop=False)
    assert dfg.shape == (0, 3)
    assert group_vars(dfg) == ["g"]
    assert group_size(dfg) == []

    x = summarise(dfg, n=n())
    assert x.shape == (0, 2)
    assert group_vars(x) == []

    x = mutate(dfg, c=f.b + 1)
    assert x.shape == (0, 4)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = filter(dfg, f.a == 100)
    assert x.shape == (0, 3)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = arrange(dfg, f.a, f.g)
    assert x.shape == (0, 3)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = select(dfg, f.a)
    assert x.shape == (0, 2)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

Exemplo n.º 29

0

Exibir arquivo

def test_nb_fail():
    from datar.datasets import iris

    out = iris >> mutate(
        across(
            where(is_double) & ~c(f["Petal_Length"], f["Petal_Width"]), round))
    rows = out >> nrow()
    assert rows == 150

Exemplo n.º 30

0

Exibir arquivo

def test_if_any_all_in_mutate():
    d = tibble(x=c(1, 5, 10, 10), y=c(0, 0, 0, 10), z=c(10, 5, 1, 10))
    res = d >> mutate(
        any=if_any(f[f.x:], lambda x: x > 8),
        all=if_all(f[f.x:f.any], lambda x: x > 8),
    )
    assert_iterable_equal(res["any"], [True, False, True, True])
    assert_iterable_equal(res["all"], [False, False, False, True])