def test_join(): df_left = DataFrame( {"a": ["a", "b", "a", "z"], "b": [1, 2, 3, 4], "c": [6, 5, 4, 3],} ) df_right = DataFrame( {"a": ["b", "c", "b", "a"], "k": [0, 3, 9, 6], "c": [1, 0, 2, 1],} ) joined = df_left.join(df_right, left_on="a", right_on="a").sort("a") assert joined["b"].series_equal(Series("", [1, 3, 2, 2])) joined = df_left.join(df_right, left_on="a", right_on="a", how="left").sort("a") assert joined["c_right"].is_null().sum() == 1 assert joined["b"].series_equal(Series("", [1, 3, 2, 2, 4])) joined = df_left.join(df_right, left_on="a", right_on="a", how="outer").sort("a") assert joined["c_right"].null_count() == 1 assert joined["c"].null_count() == 2 assert joined["b"].null_count() == 2
def test_join(): df_left = DataFrame({ "a": ["a", "b", "a", "z"], "b": [1, 2, 3, 4], "c": [6, 5, 4, 3], }) df_right = DataFrame({ "a": ["b", "c", "b", "a"], "k": [0, 3, 9, 6], "c": [1, 0, 2, 1], }) joined = df_left.join(df_right, left_on="a", right_on="a").sort("a") assert joined["b"].series_equal(Series("", [1, 3, 2, 2])) joined = df_left.join(df_right, left_on="a", right_on="a", how="left").sort("a") assert joined["c_right"].is_null().sum() == 1 assert joined["b"].series_equal(Series("", [1, 3, 2, 2, 4])) joined = df_left.join(df_right, left_on="a", right_on="a", how="outer").sort("a") assert joined["c_right"].null_count() == 1 assert joined["c"].null_count() == 2 assert joined["b"].null_count() == 2 df_a = DataFrame({"a": [1, 2, 1, 1], "b": ["a", "b", "c", "c"]}) df_b = DataFrame({ "foo": [1, 1, 1], "bar": ["a", "c", "c"], "ham": ["let", "var", "const"] }) # just check if join on multiple columns runs df_a.join(df_b, left_on=["a", "b"], right_on=["foo", "bar"]) eager_join = df_a.join(df_b, left_on="a", right_on="foo") lazy_join = df_a.lazy().join(df_b.lazy(), left_on="a", right_on="foo").collect() assert lazy_join.shape == eager_join.shape