Exemplo n.º 1
0
def test_merge_asof():
    left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
    right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]})

    with pytest.warns(UserWarning):
        df = pd.merge_asof(left, right, on="a")
        assert isinstance(df, pd.DataFrame)

    with pytest.warns(UserWarning):
        df = pd.merge_asof(left, right, on="a", allow_exact_matches=False)
        assert isinstance(df, pd.DataFrame)

    with pytest.warns(UserWarning):
        df = pd.merge_asof(left, right, on="a", direction="forward")
        assert isinstance(df, pd.DataFrame)

    with pytest.warns(UserWarning):
        df = pd.merge_asof(left, right, on="a", direction="nearest")
        assert isinstance(df, pd.DataFrame)

    left = pd.DataFrame({"left_val": ["a", "b", "c"]}, index=[1, 5, 10])
    right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7])

    with pytest.warns(UserWarning):
        df = pd.merge_asof(left, right, left_index=True, right_index=True)
        assert isinstance(df, pd.DataFrame)

    with pytest.raises(ValueError):
        pd.merge_asof(
            {"left_val": ["a", "b", "c"]},
            {"right_val": [1, 2, 3, 6, 7]},
            left_index=True,
            right_index=True,
        )
Exemplo n.º 2
0
def test_merge_asof_suffixes():
    """Suffix variations are handled the same as Pandas."""
    left = {"a": [1, 5, 10]}
    right = {"a": [2, 3, 6]}
    pandas_left, pandas_right = (pandas.DataFrame(left),
                                 pandas.DataFrame(right))
    modin_left, modin_right = pd.DataFrame(left), pd.DataFrame(right)
    for suffixes in [("a", "b"), (False, "c"), ("d", False)]:
        pandas_merged = pandas.merge_asof(
            pandas_left,
            pandas_right,
            left_index=True,
            right_index=True,
            suffixes=suffixes,
        )
        with warns_that_defaulting_to_pandas():
            modin_merged = pd.merge_asof(
                modin_left,
                modin_right,
                left_index=True,
                right_index=True,
                suffixes=suffixes,
            )
        df_equals(pandas_merged, modin_merged)

    with pytest.raises(ValueError):
        pandas.merge_asof(
            pandas_left,
            pandas_right,
            left_index=True,
            right_index=True,
            suffixes=(False, False),
        )
    with pytest.raises(ValueError), warns_that_defaulting_to_pandas():
        modin_merged = pd.merge_asof(
            modin_left,
            modin_right,
            left_index=True,
            right_index=True,
            suffixes=(False, False),
        )
Exemplo n.º 3
0
def test_merge_asof_on_variations():
    """on=,left_on=,right_on=,right_index=,left_index= options match Pandas."""
    left = {"a": [1, 5, 10], "left_val": ["a", "b", "c"]}
    left_index = [6, 8, 12]
    right = {"a": [1, 2, 3, 6, 7], "right_val": ["d", "e", "f", "g", "h"]}
    right_index = [6, 7, 8, 9, 15]
    pandas_left, pandas_right = (
        pandas.DataFrame(left, index=left_index),
        pandas.DataFrame(right, index=right_index),
    )
    modin_left, modin_right = (
        pd.DataFrame(left, index=left_index),
        pd.DataFrame(right, index=right_index),
    )
    for on_arguments in [
        {
            "on": "a"
        },
        {
            "left_on": "a",
            "right_on": "a"
        },
        {
            "left_on": "a",
            "right_index": True
        },
        {
            "left_index": True,
            "right_on": "a"
        },
        {
            "left_index": True,
            "right_index": True
        },
    ]:
        pandas_merged = pandas.merge_asof(pandas_left, pandas_right,
                                          **on_arguments)
        with warns_that_defaulting_to_pandas():
            modin_merged = pd.merge_asof(modin_left, modin_right,
                                         **on_arguments)
        df_equals(pandas_merged, modin_merged)
Exemplo n.º 4
0
def test_merge_asof_merge_options():
    modin_quotes = pd.DataFrame(
        {
            "time": [
                pd.Timestamp("2016-05-25 13:30:00.023"),
                pd.Timestamp("2016-05-25 13:30:00.023"),
                pd.Timestamp("2016-05-25 13:30:00.030"),
                pd.Timestamp("2016-05-25 13:30:00.041"),
                pd.Timestamp("2016-05-25 13:30:00.048"),
                pd.Timestamp("2016-05-25 13:30:00.049"),
                pd.Timestamp("2016-05-25 13:30:00.072"),
                pd.Timestamp("2016-05-25 13:30:00.075"),
            ],
            "ticker": ["GOOG", "MSFT", "MSFT", "MSFT", "GOOG", "AAPL", "GOOG", "MSFT"],
            "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
            "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03],
        }
    )
    modin_trades = pd.DataFrame(
        {
            "time": [
                pd.Timestamp("2016-05-25 13:30:00.023"),
                pd.Timestamp("2016-05-25 13:30:00.038"),
                pd.Timestamp("2016-05-25 13:30:00.048"),
                pd.Timestamp("2016-05-25 13:30:00.048"),
                pd.Timestamp("2016-05-25 13:30:00.048"),
            ],
            "ticker2": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
            "price": [51.95, 51.95, 720.77, 720.92, 98.0],
            "quantity": [75, 155, 100, 100, 100],
        }
    )
    pandas_quotes, pandas_trades = to_pandas(modin_quotes), to_pandas(modin_trades)

    # left_by + right_by
    df_equals(
        pandas.merge_asof(
            pandas_quotes,
            pandas_trades,
            on="time",
            left_by="ticker",
            right_by="ticker2",
        ),
        pd.merge_asof(
            modin_quotes,
            modin_trades,
            on="time",
            left_by="ticker",
            right_by="ticker2",
        ),
    )

    # Just by:
    pandas_trades["ticker"] = pandas_trades["ticker2"]
    modin_trades["ticker"] = modin_trades["ticker2"]
    df_equals(
        pandas.merge_asof(
            pandas_quotes,
            pandas_trades,
            on="time",
            by="ticker",
        ),
        pd.merge_asof(
            modin_quotes,
            modin_trades,
            on="time",
            by="ticker",
        ),
    )

    # Tolerance
    df_equals(
        pandas.merge_asof(
            pandas_quotes,
            pandas_trades,
            on="time",
            by="ticker",
            tolerance=pd.Timedelta("2ms"),
        ),
        pd.merge_asof(
            modin_quotes,
            modin_trades,
            on="time",
            by="ticker",
            tolerance=pd.Timedelta("2ms"),
        ),
    )

    # Direction
    df_equals(
        pandas.merge_asof(
            pandas_quotes,
            pandas_trades,
            on="time",
            by="ticker",
            direction="forward",
        ),
        pd.merge_asof(
            modin_quotes,
            modin_trades,
            on="time",
            by="ticker",
            direction="forward",
        ),
    )

    # Allow exact matches
    df_equals(
        pandas.merge_asof(
            pandas_quotes,
            pandas_trades,
            on="time",
            by="ticker",
            tolerance=pd.Timedelta("10ms"),
            allow_exact_matches=False,
        ),
        pd.merge_asof(
            modin_quotes,
            modin_trades,
            on="time",
            by="ticker",
            tolerance=pd.Timedelta("10ms"),
            allow_exact_matches=False,
        ),
    )
Exemplo n.º 5
0
def test_merge_asof_bad_arguments():
    left = {"a": [1, 5, 10], "b": [5, 7, 9]}
    right = {"a": [2, 3, 6], "b": [6, 5, 20]}
    pandas_left, pandas_right = (pandas.DataFrame(left), pandas.DataFrame(right))
    modin_left, modin_right = pd.DataFrame(left), pd.DataFrame(right)

    # Can't mix by with left_by/right_by
    with pytest.raises(ValueError):
        pandas.merge_asof(
            pandas_left, pandas_right, on="a", by="b", left_by="can't do with by"
        )
    with pytest.raises(ValueError):
        pd.merge_asof(
            modin_left, modin_right, on="a", by="b", left_by="can't do with by"
        )
    with pytest.raises(ValueError):
        pandas.merge_asof(
            pandas_left, pandas_right, by="b", on="a", right_by="can't do with by"
        )
    with pytest.raises(ValueError):
        pd.merge_asof(
            modin_left, modin_right, by="b", on="a", right_by="can't do with by"
        )

    # Can't mix on with left_on/right_on
    with pytest.raises(ValueError):
        pandas.merge_asof(pandas_left, pandas_right, on="a", left_on="can't do with by")
    with pytest.raises(ValueError):
        pd.merge_asof(modin_left, modin_right, on="a", left_on="can't do with by")
    with pytest.raises(ValueError):
        pandas.merge_asof(
            pandas_left, pandas_right, on="a", right_on="can't do with by"
        )
    with pytest.raises(ValueError):
        pd.merge_asof(modin_left, modin_right, on="a", right_on="can't do with by")

    # Can't mix left_index with left_on or on, similarly for right.
    with pytest.raises(ValueError):
        pd.merge_asof(modin_left, modin_right, on="a", right_index=True)
    with pytest.raises(ValueError):
        pd.merge_asof(
            modin_left, modin_right, left_on="a", right_on="a", right_index=True
        )
    with pytest.raises(ValueError):
        pd.merge_asof(modin_left, modin_right, on="a", left_index=True)
    with pytest.raises(ValueError):
        pd.merge_asof(
            modin_left, modin_right, left_on="a", right_on="a", left_index=True
        )

    # Need both left and right
    with pytest.raises(Exception):  # Pandas bug, didn't validate inputs sufficiently
        pandas.merge_asof(pandas_left, pandas_right, left_on="a")
    with pytest.raises(ValueError):
        pd.merge_asof(modin_left, modin_right, left_on="a")
    with pytest.raises(Exception):  # Pandas bug, didn't validate inputs sufficiently
        pandas.merge_asof(pandas_left, pandas_right, right_on="a")
    with pytest.raises(ValueError):
        pd.merge_asof(modin_left, modin_right, right_on="a")
    with pytest.raises(ValueError):
        pandas.merge_asof(pandas_left, pandas_right)
    with pytest.raises(ValueError):
        pd.merge_asof(modin_left, modin_right)