Пример #1
0
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("key", indices_values, ids=indices_keys)
def test_get(data, key):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    df_equals(modin_df.get(key), pandas_df.get(key))
    df_equals(modin_df.get(key, default="default"),
              pandas_df.get(key, default="default"))


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("dummy_na",
                         bool_arg_values,
                         ids=arg_keys("dummy_na", bool_arg_keys))
@pytest.mark.parametrize("drop_first",
                         bool_arg_values,
                         ids=arg_keys("drop_first", bool_arg_keys))
def test_get_dummies(request, data, dummy_na, drop_first):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    try:
        pandas_result = pandas.get_dummies(pandas_df,
                                           dummy_na=dummy_na,
                                           drop_first=drop_first)
    except Exception as e:
        with pytest.raises(type(e)):
            pd.get_dummies(modin_df, dummy_na=dummy_na, drop_first=drop_first)
    else:
Пример #2
0
    generate_multiindex,
    test_data_diff_dtype,
)
from modin.config import NPartitions

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")


@pytest.mark.parametrize("method", ["all", "any"])
@pytest.mark.parametrize("is_transposed", [False, True])
@pytest.mark.parametrize("skipna",
                         bool_arg_values,
                         ids=arg_keys("skipna", bool_arg_keys))
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize("data", [test_data["float_nan_data"]])
def test_all_any(data, axis, skipna, is_transposed, method):
    eval_general(
        *create_test_dfs(data),
        lambda df: getattr((df.T if is_transposed else df), method)
        (axis=axis, skipna=skipna, bool_only=None),
    )


@pytest.mark.parametrize("method", ["all", "any"])
@pytest.mark.parametrize("bool_only",
                         bool_arg_values,
                         ids=arg_keys("bool_only", bool_arg_keys))
def test_all_any_specific(bool_only, method):
Пример #3
0
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("key", indices_values, ids=indices_keys)
def test_get(data, key):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    df_equals(modin_df.get(key), pandas_df.get(key))
    df_equals(
        modin_df.get(key, default="default"), pandas_df.get(key, default="default")
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize(
    "dummy_na", bool_arg_values, ids=arg_keys("dummy_na", bool_arg_keys)
)
@pytest.mark.parametrize(
    "drop_first", bool_arg_values, ids=arg_keys("drop_first", bool_arg_keys)
)
def test_get_dummies(request, data, dummy_na, drop_first):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    try:
        pandas_result = pandas.get_dummies(
            pandas_df, dummy_na=dummy_na, drop_first=drop_first
        )
    except Exception as e:
        with pytest.raises(type(e)):
            pd.get_dummies(modin_df, dummy_na=dummy_na, drop_first=drop_first)
Пример #4
0
    rows_number = len(next(iter(
        data.values())))  # length of the first data column
    level_0 = np.random.choice([0, 1, 2], rows_number)
    level_1 = np.random.choice([3, 4, 5], rows_number)
    index = pd.MultiIndex.from_arrays([level_0, level_1])

    eval_general(
        pd.DataFrame(data, index=index),
        pandas.DataFrame(data, index=index),
        lambda df, *args, **kwargs: df.apply(func, *args, **kwargs),
        **func_kwargs,
    )


@pytest.mark.parametrize("column", ["A", ["A", "C"]],
                         ids=arg_keys("column", ["A", ["A", "C"]]))
@pytest.mark.parametrize("ignore_index",
                         bool_arg_values,
                         ids=arg_keys("ignore_index", bool_arg_keys))
def test_explode_single_partition(column, ignore_index):
    # This test data has two columns where some items are lists that
    # explode() should expand. In some rows, the columns have list-like
    # elements that must be expanded, and in others, they have empty lists
    # or items that aren't list-like at all.
    data = {
        "A": [[0, 1, 2], "foo", [], [3, 4]],
        "B": 1,
        "C": [["a", "b", "c"], np.nan, [], ["d", "e"]],
    }
    eval_general(
        *create_test_dfs(data),
Пример #5
0
    df_equals(
        modin_df.asof(modin_where.values[0], subset=subset),
        pandas_df.asof(pandas_where.values[0], subset=subset),
    )


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_first_valid_index(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    assert modin_df.first_valid_index() == (pandas_df.first_valid_index())


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("n", int_arg_values, ids=arg_keys("n", int_arg_keys))
def test_head(data, n):
    # Test normal dataframe head
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    df_equals(modin_df.head(n), pandas_df.head(n))
    df_equals(modin_df.head(len(modin_df) + 1),
              pandas_df.head(len(pandas_df) + 1))

    # Test head when we call it from a QueryCompilerView
    modin_result = modin_df.loc[:, ["col1", "col3", "col3"]].head(n)
    pandas_result = pandas_df.loc[:, ["col1", "col3", "col3"]].head(n)
    df_equals(modin_result, pandas_result)


@pytest.mark.skip(reason="Defaulting to Pandas")
Пример #6
0
    # Named Series promoted to DF
    s = pd.Series(frame_data2.get("col1"))
    with pytest.raises(ValueError):
        modin_df.merge(s)

    s = pd.Series(frame_data2.get("col1"), name="col1")
    df_equals(modin_df.merge(s), modin_df.merge(modin_df2[["col1"]]))

    with pytest.raises(TypeError):
        modin_df.merge("Non-valid type")


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("ascending",
                         bool_arg_values,
                         ids=arg_keys("ascending", bool_arg_keys))
@pytest.mark.parametrize("na_position", ["first", "last"],
                         ids=["first", "last"])
def test_sort_index(axis, ascending, na_position):
    data = test_data["float_nan_data"]
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    # Change index value so sorting will actually make a difference
    if axis == 0:
        length = len(modin_df.index)
        for df in [modin_df, pandas_df]:
            df.index = [(i - length / 2) % length for i in range(length)]

    # Add NaNs to sorted index
    for df in [modin_df, pandas_df]:
        sort_index = df.axes[axis]
Пример #7
0
    create_test_dfs,
    generate_multiindex,
    test_data_diff_dtype,
)
from modin.config import NPartitions

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")


@pytest.mark.parametrize("method", ["all", "any"])
@pytest.mark.parametrize("is_transposed", [False, True])
@pytest.mark.parametrize(
    "skipna", bool_arg_values, ids=arg_keys("skipna", bool_arg_keys)
)
@pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
@pytest.mark.parametrize("data", [test_data["float_nan_data"]])
def test_all_any(data, axis, skipna, is_transposed, method):
    eval_general(
        *create_test_dfs(data),
        lambda df: getattr((df.T if is_transposed else df), method)(
            axis=axis, skipna=skipna, bool_only=None
        ),
    )


@pytest.mark.parametrize("method", ["all", "any"])
@pytest.mark.parametrize(
    "bool_only", bool_arg_values, ids=arg_keys("bool_only", bool_arg_keys)
Пример #8
0
    # Named Series promoted to DF
    s = pd.Series(frame_data2.get("col1"))
    with pytest.raises(ValueError):
        modin_df.merge(s)

    s = pd.Series(frame_data2.get("col1"), name="col1")
    df_equals(modin_df.merge(s), modin_df.merge(modin_df2[["col1"]]))

    with pytest.raises(TypeError):
        modin_df.merge("Non-valid type")


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize(
    "ascending", bool_arg_values, ids=arg_keys("ascending", bool_arg_keys)
)
@pytest.mark.parametrize("na_position", ["first", "last"], ids=["first", "last"])
def test_sort_index(axis, ascending, na_position):
    data = test_data["float_nan_data"]
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    # Change index value so sorting will actually make a difference
    if axis == 0:
        length = len(modin_df.index)
        for df in [modin_df, pandas_df]:
            df.index = [(i - length / 2) % length for i in range(length)]

    # Add NaNs to sorted index
    for df in [modin_df, pandas_df]:
        sort_index = df.axes[axis]
Пример #9
0
    int_arg_values,
    test_data,
    eval_general,
    create_test_dfs,
    test_data_diff_dtype,
)

pd.DEFAULT_NPARTITIONS = 4

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize(
    "skipna", bool_arg_values, ids=arg_keys("skipna", bool_arg_keys)
)
@pytest.mark.parametrize("method", ["cumprod", "cummin", "cummax", "cumsum"])
def test_cumprod_cummin_cummax_cumsum(axis, skipna, method):
    eval_general(
        *create_test_dfs(test_data["float_nan_data"]),
        lambda df: getattr(df, method)(axis=axis, skipna=skipna),
    )


@pytest.mark.parametrize("axis", ["rows", "columns"])
@pytest.mark.parametrize("method", ["cumprod", "cummin", "cummax", "cumsum"])
def test_cumprod_cummin_cummax_cumsum_transposed(axis, method):
    eval_general(
        *create_test_dfs(test_data["int_data"]),
        lambda df: getattr(df.T, method)(axis=axis),
Пример #10
0
    eval_general,
    create_test_dfs,
    test_data_diff_dtype,
)
from modin.config import NPartitions

NPartitions.put(4)

# Force matplotlib to not use any Xwindows backend.
matplotlib.use("Agg")


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("skipna",
                         bool_arg_values,
                         ids=arg_keys("skipna", bool_arg_keys))
@pytest.mark.parametrize("method", ["cumprod", "cummin", "cummax", "cumsum"])
def test_cumprod_cummin_cummax_cumsum(axis, skipna, method):
    eval_general(
        *create_test_dfs(test_data["float_nan_data"]),
        lambda df: getattr(df, method)(axis=axis, skipna=skipna),
    )


@pytest.mark.parametrize("axis", ["rows", "columns"])
@pytest.mark.parametrize("method", ["cumprod", "cummin", "cummax", "cumsum"])
def test_cumprod_cummin_cummax_cumsum_transposed(axis, method):
    eval_general(
        *create_test_dfs(test_data["int_data"]),
        lambda df: getattr(df.T, method)(axis=axis),
    )
Пример #11
0
    # Named Series promoted to DF
    s = pd.Series(frame_data2.get("col1"))
    with pytest.raises(ValueError):
        modin_df.merge(s)

    s = pd.Series(frame_data2.get("col1"), name="col1")
    df_equals(modin_df.merge(s), modin_df.merge(modin_df2[["col1"]]))

    with pytest.raises(TypeError):
        modin_df.merge("Non-valid type")


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize(
    "ascending", bool_arg_values, ids=arg_keys("ascending", bool_arg_keys)
)
@pytest.mark.parametrize("na_position", ["first", "last"], ids=["first", "last"])
def test_sort_index(axis, ascending, na_position):
    data = test_data["float_nan_data"]
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    # Change index value so sorting will actually make a difference
    if axis == 0:
        length = len(modin_df.index)
        for df in [modin_df, pandas_df]:
            df.index = [(i - length / 2) % length for i in range(length)]

    # Add NaNs to sorted index
    for df in [modin_df, pandas_df]:
        sort_index = df.axes[axis]