def test_select_doesnot_fail_if_some_names_missing(): df1 = tibble(x=range(1, 11), y=range(1, 11), z=range(1, 11)) df2 = colnames(df1, ["x", "y", ""]) out1 = select(df1, f.x) assert out1.equals(tibble(x=range(1, 11))) out2 = select(df2, f.x) assert out2.equals(tibble(x=range(1, 11)))
def test_works_on_na_names(): df = tibble(x=1, y=2, z=3) >> colnames(c("x", "y", NA)) out = select(df, f.x) assert_iterable_equal(out.x, [1]) df >>= colnames(c(NA, "y", "z")) out = select(df, f.y) assert_iterable_equal(out.y, [2])
def test_with_no_args_returns_nothing(): empty = select(mtcars) assert ncol(empty) == 0 assert nrow(empty) == 32 empty = select(mtcars, **{}) assert ncol(empty) == 0 assert nrow(empty) == 32
def test_select_rename_with_dup_names(): df = tibble(tibble(x=1), x=2, _name_repair="minimal") with pytest.raises( ValueError, match='Names must be unique. Name "x" found at locations'): df >> select(y=f.x) with pytest.raises(KeyError): df >> select(y=3)
def test_excluding_all_vars_returns_nothing(): out = select(mtcars, ~f[f.mpg:]) assert out.shape == (32, 0) out = mtcars >> select(starts_with("x")) assert out.shape == (32, 0) out = mtcars >> select(~matches(".")) assert out.shape == (32, 0)
def test_can_select_with_duplicate_columns(): df = tibble(tibble(x=1), x=2, y=1, _name_repair="minimal") out = select(df, 0, 2) assert out.columns.tolist() == ["x", "y"] out = select(df, 2, 0) assert out.columns.tolist() == ["y", "x"] out = select(df, f.y) assert out.columns.tolist() == ["y"]
def test_rowwise_preserved_by_major_verbs(): rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x) out = arrange(rf, f.y) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = filter(rf, f.x < 3) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = mutate(rf, x=f.x + 1) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = rename(rf, X=f.x) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["X"] out = select(rf, "x") assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = slice(rf, c(0, 0)) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] # Except for summarise out = summarise(rf, z=mean(f.x, f.y)) assert isinstance(out, TibbleGrouped) assert group_vars(out) == ["x"]
def test_0_groups_select(): df = tibble(x=1).loc[[], :] >> group_by(f.x) res = df >> select(f.x) d1 = df >> dim() d2 = res >> dim() assert d1 == d2 assert df.columns.tolist() == res.columns.tolist()
def test_zero_row_dfs(): df = tibble(a=[], b=[], g=[]) dfg = group_by(df, f.g, _drop=False) assert dfg.shape == (0, 3) assert group_vars(dfg) == ["g"] assert group_size(dfg) == [] x = summarise(dfg, n=n()) assert x.shape == (0, 2) assert group_vars(x) == [] x = mutate(dfg, c=f.b + 1) assert x.shape == (0, 4) assert group_vars(x) == ["g"] assert group_size(x) == [] x = filter(dfg, f.a == 100) assert x.shape == (0, 3) assert group_vars(x) == ["g"] assert group_size(x) == [] x = arrange(dfg, f.a, f.g) assert x.shape == (0, 3) assert group_vars(x) == ["g"] assert group_size(x) == [] x = select(dfg, f.a) assert x.shape == (0, 2) assert group_vars(x) == ["g"] assert group_size(x) == []
def test_can_be_before_group_by(): df = tibble(id=c(1, 1, 2, 2, 2, 3, 3, 4, 4, 5), year=c(2013, 2013, 2012, 2013, 2013, 2013, 2012, 2012, 2013, 2013), var1=rnorm(10)) dfagg = df >> group_by(f.id, f.year) >> select( f.id, f.year, f.var1) >> summarise(var1=mean(f.var1)) assert_iterable_equal(names(dfagg), ["id", "year", "var1"])
def test_group_split_keep_false_does_not_tryto_remove_virtual_grouping_cols(): # test_that("group_split(keep=FALSE) does not try to # remove virtual grouping columns (#4045)", { iris3 = iris.head(4).copy() df = group_by(iris3, _bootstrap=[0, 1, 0, 1]) rows = [[0, 2], [1, 3]] res = group_split.list(df, _keep=False) iris3 = select(iris3, ~f._bootstrap) assert len(res) == 2 assert_frame_equal(res[0], iris3.iloc[rows[0], :].reset_index(drop=True)) assert_frame_equal(res[1], iris3.iloc[rows[1], :].reset_index(drop=True))
def test_tidyselect_funs(): # tidyselect.where def isupper(ser): return ser.name.isupper() df = tibble(x=1, X=2, y=3, Y=4) out = df >> select(where(isupper)) assert out.columns.tolist() == ["X", "Y"] @register_verb def islower(_data, series): return [series.name.islower(), True] out = df >> select(where(islower)) assert out.columns.tolist() == ["x", "y"] out = df >> select(where(lambda x: False)) assert out.shape == (1, 0) out = df >> select(ends_with("y")) assert out.columns.tolist() == ["y", "Y"] out = df >> select(contains("y")) assert out.columns.tolist() == ["y", "Y"] with pytest.raises(KeyError): df >> select(all_of(["x", "a"])) out = df >> select(any_of(["x", "y"])) assert out.columns.tolist() == ["x", "y"] out = df >> select(any_of(["x", "a"])) assert out.columns.tolist() == ["x"] out = num_range("a", 3, width=2) assert out == ["a00", "a01", "a02"] df = tibble(tibble(X=1), X=2, _name_repair="minimal") out = df >> select(contains("X")) assert out.columns.tolist() == ["X"]
def avg_weights_and_filter(owfiles): _log("- Averaging bin weights") ofile = outfile.parent / "_avg_weights_filtered.bed" df = None for owfile in owfiles: tmp = pandas.read_csv(owfile, sep="\t", header=0) df = df >> bind_rows(tmp) df = df >> group_by(f.chrom1, f.start1, f.end1) >> summarise( chrom=f.chrom1, start=f.start1, end=f.end1, name=paste(f.name, collapse=":"), score=mean(f.weight), strand="+", ) >> filter_( f.score >= cutoff ) >> ungroup() >> select( ~f.chrom1, ~f.start1, ~f.end1, ) df.to_csv(ofile, sep="\t", index=False, header=False) return ofile, len(df.columns)
def test_arguments_to_select_dont_match_vars_select_arguments(): df = tibble(a=1) out = select(df, var=f.a) assert out.equals(tibble(var=1)) out = select(group_by(df, f.a), var=f.a) exp = group_by(tibble(var=1), f.var) assert out.equals(exp) assert group_vars(out) == group_vars(exp) out = select(df, exclude=f.a) assert out.equals(tibble(exclude=1)) out = select(df, include=f.a) assert out.equals(tibble(include=1)) out = select(group_by(df, f.a), exclude=f.a) exp = group_by(tibble(exclude=1), f.exclude) assert out.equals(exp) assert group_vars(out) == group_vars(exp) out = select(group_by(df, f.a), include=f.a) exp = group_by(tibble(include=1), f.include) assert out.equals(exp) assert group_vars(out) == group_vars(exp)
def test_keeps_attributes(): df = tibble(x=1) df.attrs["a"] = "b" out = select(df, f.x) assert out.attrs["a"] == "b"
def test_0_col_df(): df = tibble(x=range(10)) >> select(~f.x) cols = df >> distinct() >> ncol() assert cols == 0
def test_transmute_can_handle_auto_splicing(): out = iris >> transmute(tibble(f.Sepal_Length, f.Sepal_Width)) exp = iris >> select(f.Sepal_Length, f.Sepal_Width) assert out.equals(exp)
def test_slice_works_with_0col_dfs(): out = tibble(a=[1, 2, 3]) >> select(~f.a) >> slice(1) >> nrow() assert out == 1
def test_negating_empty_match_returns_everything(): df = tibble(x=[1, 2, 3], y=[3, 2, 1]) out = df >> select(~starts_with("xyz")) assert out.equals(df)
def test_can_select_data_pronoun(): out = select(mtcars, mtcars.cyl) exp = select(mtcars, f.cyl) assert out.equals(exp)
def test_non_syntactic_grouping_variable_is_preserved(): df = DataFrame({"a b": [1]}) >> group_by("a b") >> select() assert df.columns.tolist() == ["a b"] df = DataFrame({"a b": [1]}) >> group_by(f["a b"]) >> select() assert df.columns.tolist() == ["a b"]
def test_can_select_with_list_of_strs(): out = select(mtcars, "cyl", "disp", c("cyl", "am", "drat")) # https://github.com/pwwang/datar/issues/23 # exp = mtcars[c("cyl", "disp", "am", "drat")] exp = mtcars[["cyl", "disp", "am", "drat"]] assert out.equals(exp)
def test_grouping_variables_preserved_with_a_message(caplog): df = tibble(g=[1, 2, 3], x=[3, 2, 1]) >> group_by(f.g) res = select(df, f.x) assert "Adding missing grouping variables" in caplog.text assert res.columns.tolist() == ["g", "x"]
def test_preserves_grouping(): gf = group_by(tibble(g=[1, 2, 3], x=[3, 2, 1]), f.g) out = select(gf, h=f.g) assert group_vars(out), ["h"]
def test_treats_null_inputs_as_empty(): out = select(mtcars, None, f.cyl, None) exp = select(mtcars, f.cyl) assert out.equals(exp)
def test_can_select_with_strings(): variabls = dict(foo="cyl", bar="am") out = select(mtcars, **variabls) exp = select(mtcars, foo=f.cyl, bar=f.am) assert out.equals(exp)
def test_select_add_group_vars(): res = mtcars >> group_by(f.vs) >> select(f.mpg) assert res.columns.tolist() == ["vs", "mpg"]