def test_merge_asof(): left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}) with pytest.warns(UserWarning): df = pd.merge_asof(left, right, on="a") assert isinstance(df, pd.DataFrame) with pytest.warns(UserWarning): df = pd.merge_asof(left, right, on="a", allow_exact_matches=False) assert isinstance(df, pd.DataFrame) with pytest.warns(UserWarning): df = pd.merge_asof(left, right, on="a", direction="forward") assert isinstance(df, pd.DataFrame) with pytest.warns(UserWarning): df = pd.merge_asof(left, right, on="a", direction="nearest") assert isinstance(df, pd.DataFrame) left = pd.DataFrame({"left_val": ["a", "b", "c"]}, index=[1, 5, 10]) right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7]) with pytest.warns(UserWarning): df = pd.merge_asof(left, right, left_index=True, right_index=True) assert isinstance(df, pd.DataFrame) with pytest.raises(ValueError): pd.merge_asof( {"left_val": ["a", "b", "c"]}, {"right_val": [1, 2, 3, 6, 7]}, left_index=True, right_index=True, )
def test_merge_asof_suffixes(): """Suffix variations are handled the same as Pandas.""" left = {"a": [1, 5, 10]} right = {"a": [2, 3, 6]} pandas_left, pandas_right = (pandas.DataFrame(left), pandas.DataFrame(right)) modin_left, modin_right = pd.DataFrame(left), pd.DataFrame(right) for suffixes in [("a", "b"), (False, "c"), ("d", False)]: pandas_merged = pandas.merge_asof( pandas_left, pandas_right, left_index=True, right_index=True, suffixes=suffixes, ) with warns_that_defaulting_to_pandas(): modin_merged = pd.merge_asof( modin_left, modin_right, left_index=True, right_index=True, suffixes=suffixes, ) df_equals(pandas_merged, modin_merged) with pytest.raises(ValueError): pandas.merge_asof( pandas_left, pandas_right, left_index=True, right_index=True, suffixes=(False, False), ) with pytest.raises(ValueError), warns_that_defaulting_to_pandas(): modin_merged = pd.merge_asof( modin_left, modin_right, left_index=True, right_index=True, suffixes=(False, False), )
def test_merge_asof_on_variations(): """on=,left_on=,right_on=,right_index=,left_index= options match Pandas.""" left = {"a": [1, 5, 10], "left_val": ["a", "b", "c"]} left_index = [6, 8, 12] right = {"a": [1, 2, 3, 6, 7], "right_val": ["d", "e", "f", "g", "h"]} right_index = [6, 7, 8, 9, 15] pandas_left, pandas_right = ( pandas.DataFrame(left, index=left_index), pandas.DataFrame(right, index=right_index), ) modin_left, modin_right = ( pd.DataFrame(left, index=left_index), pd.DataFrame(right, index=right_index), ) for on_arguments in [ { "on": "a" }, { "left_on": "a", "right_on": "a" }, { "left_on": "a", "right_index": True }, { "left_index": True, "right_on": "a" }, { "left_index": True, "right_index": True }, ]: pandas_merged = pandas.merge_asof(pandas_left, pandas_right, **on_arguments) with warns_that_defaulting_to_pandas(): modin_merged = pd.merge_asof(modin_left, modin_right, **on_arguments) df_equals(pandas_merged, modin_merged)
def test_merge_asof_merge_options(): modin_quotes = pd.DataFrame( { "time": [ pd.Timestamp("2016-05-25 13:30:00.023"), pd.Timestamp("2016-05-25 13:30:00.023"), pd.Timestamp("2016-05-25 13:30:00.030"), pd.Timestamp("2016-05-25 13:30:00.041"), pd.Timestamp("2016-05-25 13:30:00.048"), pd.Timestamp("2016-05-25 13:30:00.049"), pd.Timestamp("2016-05-25 13:30:00.072"), pd.Timestamp("2016-05-25 13:30:00.075"), ], "ticker": ["GOOG", "MSFT", "MSFT", "MSFT", "GOOG", "AAPL", "GOOG", "MSFT"], "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01], "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03], } ) modin_trades = pd.DataFrame( { "time": [ pd.Timestamp("2016-05-25 13:30:00.023"), pd.Timestamp("2016-05-25 13:30:00.038"), pd.Timestamp("2016-05-25 13:30:00.048"), pd.Timestamp("2016-05-25 13:30:00.048"), pd.Timestamp("2016-05-25 13:30:00.048"), ], "ticker2": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], "price": [51.95, 51.95, 720.77, 720.92, 98.0], "quantity": [75, 155, 100, 100, 100], } ) pandas_quotes, pandas_trades = to_pandas(modin_quotes), to_pandas(modin_trades) # left_by + right_by df_equals( pandas.merge_asof( pandas_quotes, pandas_trades, on="time", left_by="ticker", right_by="ticker2", ), pd.merge_asof( modin_quotes, modin_trades, on="time", left_by="ticker", right_by="ticker2", ), ) # Just by: pandas_trades["ticker"] = pandas_trades["ticker2"] modin_trades["ticker"] = modin_trades["ticker2"] df_equals( pandas.merge_asof( pandas_quotes, pandas_trades, on="time", by="ticker", ), pd.merge_asof( modin_quotes, modin_trades, on="time", by="ticker", ), ) # Tolerance df_equals( pandas.merge_asof( pandas_quotes, pandas_trades, on="time", by="ticker", tolerance=pd.Timedelta("2ms"), ), pd.merge_asof( modin_quotes, modin_trades, on="time", by="ticker", tolerance=pd.Timedelta("2ms"), ), ) # Direction df_equals( pandas.merge_asof( pandas_quotes, pandas_trades, on="time", by="ticker", direction="forward", ), pd.merge_asof( modin_quotes, modin_trades, on="time", by="ticker", direction="forward", ), ) # Allow exact matches df_equals( pandas.merge_asof( pandas_quotes, pandas_trades, on="time", by="ticker", tolerance=pd.Timedelta("10ms"), allow_exact_matches=False, ), pd.merge_asof( modin_quotes, modin_trades, on="time", by="ticker", tolerance=pd.Timedelta("10ms"), allow_exact_matches=False, ), )
def test_merge_asof_bad_arguments(): left = {"a": [1, 5, 10], "b": [5, 7, 9]} right = {"a": [2, 3, 6], "b": [6, 5, 20]} pandas_left, pandas_right = (pandas.DataFrame(left), pandas.DataFrame(right)) modin_left, modin_right = pd.DataFrame(left), pd.DataFrame(right) # Can't mix by with left_by/right_by with pytest.raises(ValueError): pandas.merge_asof( pandas_left, pandas_right, on="a", by="b", left_by="can't do with by" ) with pytest.raises(ValueError): pd.merge_asof( modin_left, modin_right, on="a", by="b", left_by="can't do with by" ) with pytest.raises(ValueError): pandas.merge_asof( pandas_left, pandas_right, by="b", on="a", right_by="can't do with by" ) with pytest.raises(ValueError): pd.merge_asof( modin_left, modin_right, by="b", on="a", right_by="can't do with by" ) # Can't mix on with left_on/right_on with pytest.raises(ValueError): pandas.merge_asof(pandas_left, pandas_right, on="a", left_on="can't do with by") with pytest.raises(ValueError): pd.merge_asof(modin_left, modin_right, on="a", left_on="can't do with by") with pytest.raises(ValueError): pandas.merge_asof( pandas_left, pandas_right, on="a", right_on="can't do with by" ) with pytest.raises(ValueError): pd.merge_asof(modin_left, modin_right, on="a", right_on="can't do with by") # Can't mix left_index with left_on or on, similarly for right. with pytest.raises(ValueError): pd.merge_asof(modin_left, modin_right, on="a", right_index=True) with pytest.raises(ValueError): pd.merge_asof( modin_left, modin_right, left_on="a", right_on="a", right_index=True ) with pytest.raises(ValueError): pd.merge_asof(modin_left, modin_right, on="a", left_index=True) with pytest.raises(ValueError): pd.merge_asof( modin_left, modin_right, left_on="a", right_on="a", left_index=True ) # Need both left and right with pytest.raises(Exception): # Pandas bug, didn't validate inputs sufficiently pandas.merge_asof(pandas_left, pandas_right, left_on="a") with pytest.raises(ValueError): pd.merge_asof(modin_left, modin_right, left_on="a") with pytest.raises(Exception): # Pandas bug, didn't validate inputs sufficiently pandas.merge_asof(pandas_left, pandas_right, right_on="a") with pytest.raises(ValueError): pd.merge_asof(modin_left, modin_right, right_on="a") with pytest.raises(ValueError): pandas.merge_asof(pandas_left, pandas_right) with pytest.raises(ValueError): pd.merge_asof(modin_left, modin_right)