示例#1
0
 def setup(self, self_type, value_type, shape, limit, inplace):
     pd = IMPL[ASV_USE_IMPL]
     columns = [f"col{x}" for x in range(shape[1])]
     if self_type == "DataFrame":
         self.dataset = pd.DataFrame(np.nan,
                                     index=pd.RangeIndex(shape[0]),
                                     columns=columns)
     elif self_type == "Series":
         self.dataset = pd.Series(np.nan, index=pd.RangeIndex(shape[0]))
     else:
         assert False
     if value_type == "scalar":
         self.value = 18.19
     elif value_type == "dict":
         self.value = {k: k * 1.23 for k in range(shape[0])}
     elif value_type == "Series":
         self.value = pd.Series([k * 1.23 for k in range(shape[0])],
                                index=pd.RangeIndex(shape[0]))
     elif value_type == "DataFrame":
         if self_type == "Series":
             raise NotImplementedError
         self.value = pd.DataFrame(
             {
                 k: [i + j * 1.23 for j in range(shape[0])]
                 for i, k in enumerate(columns)
             },
             index=pd.RangeIndex(shape[0]),
             columns=columns,
         )
     else:
         assert False
     self.limit = int(limit * shape[0]) if limit else None
示例#2
0
def test___setitem__partitions_aligning():
    # from issue #2390
    modin_df = pd.DataFrame({"a": [1, 2, 3]})
    pandas_df = pandas.DataFrame({"a": [1, 2, 3]})
    modin_df["b"] = pd.Series([4, 5, 6, 7, 8])
    pandas_df["b"] = pandas.Series([4, 5, 6, 7, 8])
    df_equals(modin_df, pandas_df)

    # from issue #2442
    data = {"a": [1, 2, 3, 4]}
    # Index with duplicated timestamp
    index = pandas.to_datetime(
        ["2020-02-06", "2020-02-06", "2020-02-22", "2020-03-26"])

    md_df, pd_df = create_test_dfs(data, index=index)
    # Setting new column
    pd_df["b"] = pandas.Series(np.arange(4))
    md_df["b"] = pd.Series(np.arange(4))
    df_equals(md_df, pd_df)

    # Setting existing column
    pd_df["b"] = pandas.Series(np.arange(4))
    md_df["b"] = pd.Series(np.arange(4))
    df_equals(md_df, pd_df)

    pd_df["a"] = pandas.Series(np.arange(4))
    md_df["a"] = pd.Series(np.arange(4))
    df_equals(md_df, pd_df)
示例#3
0
def test_matmul(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    col_len = len(modin_df.columns)

    # Test list input
    arr = np.arange(col_len)
    modin_result = modin_df @ arr
    pandas_result = pandas_df @ arr
    df_equals(modin_result, pandas_result)

    # Test bad dimensions
    with pytest.raises(ValueError):
        modin_result = modin_df @ np.arange(col_len + 10)

    # Test series input
    modin_series = pd.Series(np.arange(col_len), index=modin_df.columns)
    pandas_series = pandas.Series(np.arange(col_len), index=pandas_df.columns)
    modin_result = modin_df @ modin_series
    pandas_result = pandas_df @ pandas_series
    df_equals(modin_result, pandas_result)

    # Test dataframe input
    modin_result = modin_df @ modin_df.T
    pandas_result = pandas_df @ pandas_df.T
    df_equals(modin_result, pandas_result)

    # Test when input series index doesn't line up with columns
    with pytest.raises(ValueError):
        modin_result = modin_df @ pd.Series(np.arange(col_len))
示例#4
0
def test_compare(align_axis, keep_shape, keep_equal):
    kwargs = {
        "align_axis": align_axis,
        "keep_shape": keep_shape,
        "keep_equal": keep_equal,
    }
    frame_data1 = random_state.randn(100, 10)
    frame_data2 = random_state.randn(100, 10)
    pandas_df = pandas.DataFrame(frame_data1, columns=list("abcdefghij"))
    pandas_df2 = pandas.DataFrame(frame_data2, columns=list("abcdefghij"))
    modin_df = pd.DataFrame(frame_data1, columns=list("abcdefghij"))
    modin_df2 = pd.DataFrame(frame_data2, columns=list("abcdefghij"))

    modin_result = modin_df.compare(modin_df2, **kwargs)
    pandas_result = pandas_df.compare(pandas_df2, **kwargs)
    assert to_pandas(modin_result).equals(pandas_result)

    modin_result = modin_df2.compare(modin_df, **kwargs)
    pandas_result = pandas_df2.compare(pandas_df, **kwargs)
    assert to_pandas(modin_result).equals(pandas_result)

    series_data1 = ["a", "b", "c", "d", "e"]
    series_data2 = ["a", "a", "c", "b", "e"]
    pandas_series1 = pandas.Series(series_data1)
    pandas_series2 = pandas.Series(series_data2)
    modin_series1 = pd.Series(series_data1)
    modin_series2 = pd.Series(series_data2)

    modin_result = modin_series1.compare(modin_series2, **kwargs)
    pandas_result = pandas_series1.compare(pandas_series2, **kwargs)
    assert to_pandas(modin_result).equals(pandas_result)

    modin_result = modin_series2.compare(modin_series1, **kwargs)
    pandas_result = pandas_series2.compare(pandas_series1, **kwargs)
    assert to_pandas(modin_result).equals(pandas_result)
示例#5
0
def create_test_series(vals):
    if isinstance(vals, dict):
        modin_series = pd.Series(vals[next(iter(vals.keys()))])
        pandas_series = pandas.Series(vals[next(iter(vals.keys()))])
    else:
        modin_series = pd.Series(vals)
        pandas_series = pandas.Series(vals)
    return modin_series, pandas_series
示例#6
0
def test_unique():
    modin_result = pd.unique([2, 1, 3, 3])
    pandas_result = pandas.unique([2, 1, 3, 3])
    assert_array_equal(modin_result, pandas_result)

    modin_result = pd.unique(pd.Series([2] + [1] * 5))
    pandas_result = pandas.unique(pandas.Series([2] + [1] * 5))
    assert_array_equal(modin_result, pandas_result)

    modin_result = pd.unique(
        pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")])
    )
    pandas_result = pandas.unique(
        pandas.Series([pandas.Timestamp("20160101"), pandas.Timestamp("20160101")])
    )
    assert_array_equal(modin_result, pandas_result)

    modin_result = pd.unique(
        pd.Series(
            [
                pd.Timestamp("20160101", tz="US/Eastern"),
                pd.Timestamp("20160101", tz="US/Eastern"),
            ]
        )
    )
    pandas_result = pandas.unique(
        pandas.Series(
            [
                pandas.Timestamp("20160101", tz="US/Eastern"),
                pandas.Timestamp("20160101", tz="US/Eastern"),
            ]
        )
    )
    assert_array_equal(modin_result, pandas_result)

    modin_result = pd.unique(
        pd.Index(
            [
                pd.Timestamp("20160101", tz="US/Eastern"),
                pd.Timestamp("20160101", tz="US/Eastern"),
            ]
        )
    )
    pandas_result = pandas.unique(
        pandas.Index(
            [
                pandas.Timestamp("20160101", tz="US/Eastern"),
                pandas.Timestamp("20160101", tz="US/Eastern"),
            ]
        )
    )
    assert_array_equal(modin_result, pandas_result)

    modin_result = pd.unique(pd.Series(pd.Categorical(list("baabc"))))
    pandas_result = pandas.unique(pandas.Series(pandas.Categorical(list("baabc"))))
    assert_array_equal(modin_result, pandas_result)
示例#7
0
def test_to_datetime():
    # DataFrame input for to_datetime
    modin_df = pd.DataFrame({
        "year": [2015, 2016],
        "month": [2, 3],
        "day": [4, 5]
    })
    pandas_df = pandas.DataFrame({
        "year": [2015, 2016],
        "month": [2, 3],
        "day": [4, 5]
    })
    df_equals(pd.to_datetime(modin_df), pandas.to_datetime(pandas_df))

    # Series input for to_datetime
    modin_s = pd.Series(["3/11/2000", "3/12/2000", "3/13/2000"] * 1000)
    pandas_s = pandas.Series(["3/11/2000", "3/12/2000", "3/13/2000"] * 1000)
    df_equals(pd.to_datetime(modin_s), pandas.to_datetime(pandas_s))

    # Other inputs for to_datetime
    value = 1490195805
    assert pd.to_datetime(value, unit="s") == pandas.to_datetime(value,
                                                                 unit="s")
    value = 1490195805433502912
    assert pd.to_datetime(value, unit="ns") == pandas.to_datetime(value,
                                                                  unit="ns")
    value = [1, 2, 3]
    assert pd.to_datetime(
        value, unit="D", origin=pd.Timestamp("2000-01-01")).equals(
            pandas.to_datetime(value,
                               unit="D",
                               origin=pandas.Timestamp("2000-01-01")))
示例#8
0
def test_assign():
    data = test_data_values[0]
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    modin_result = modin_df.assign(new_column=pd.Series(modin_df.iloc[:, 0]))
    pandas_result = pandas_df.assign(new_column=pandas.Series(pandas_df.iloc[:, 0]))
    df_equals(modin_result, pandas_result)
    modin_result = modin_df.assign(
        new_column=pd.Series(modin_df.iloc[:, 0]),
        new_column2=pd.Series(modin_df.iloc[:, 1]),
    )
    pandas_result = pandas_df.assign(
        new_column=pandas.Series(pandas_df.iloc[:, 0]),
        new_column2=pandas.Series(pandas_df.iloc[:, 1]),
    )
    df_equals(modin_result, pandas_result)
示例#9
0
    def setup(self, value_type, shape, limit):
        pd = IMPL[ASV_USE_IMPL]
        self.df = gen_nan_data(ASV_USE_IMPL, *shape)
        columns = self.df.columns

        if value_type == "scalar":
            self.value = 18.19
        elif value_type == "dict":
            self.value = {k: i * 1.23 for i, k in enumerate(columns)}
        elif value_type == "Series":
            self.value = pd.Series([i * 1.23 for i in range(len(columns))],
                                   index=columns)
        elif value_type == "DataFrame":
            self.value = pd.DataFrame(
                {
                    k: [i + j * 1.23 for j in range(shape[0])]
                    for i, k in enumerate(columns)
                },
                index=pd.RangeIndex(shape[0]),
                columns=columns,
            )
        else:
            assert False
        limit = int(limit * shape[0]) if limit else None
        self.kw = {"value": self.value, "limit": limit}
示例#10
0
 def test_apply_on_empty_modin_series(self):
     LOG.info("test_apply_on_empty_series")
     md = self.modinSetUp()
     series = md.Series()
     md_val = series.apply(math_foo, compare_to=1)
     swifter_val = series.swifter.apply(math_foo, compare_to=1)
     self.assertEqual(md_val, swifter_val)  # equality test
示例#11
0
def test_setitem_on_empty_df(data, value, convert_to_series, new_col_id):
    pandas_df = pandas.DataFrame(data)
    modin_df = pd.DataFrame(data)

    pandas_df[new_col_id] = pandas.Series(value) if convert_to_series else value
    modin_df[new_col_id] = pd.Series(value) if convert_to_series else value
    df_equals(modin_df, pandas_df)
示例#12
0
def test_object_dtype_categorical():
    cat_series = pd.Series(
        pd.Categorical(my_object_vals, categories=my_object_vals))
    widget = show_grid(cat_series)
    constraints_enum = widget._columns[0]["constraints"]["enum"]
    assert not isinstance(constraints_enum[0], dict)
    assert not isinstance(constraints_enum[1], dict)

    widget._handle_view_msg_helper({
        "type": "show_filter_dropdown",
        "field": 0,
        "search_val": None
    })
    widget._handle_view_msg_helper({
        "field": 0,
        "filter_info": {
            "field": 0,
            "selected": [0],
            "type": "text",
            "excluded": [],
        },
        "type": "change_filter",
    })
    assert len(widget._df) == 1
    assert widget._df[0][0] == cat_series[0]
示例#13
0
def test_add_row_button():
    widget = SpreadsheetWidget(df=create_df())
    event_history = init_event_history("row_added", widget=widget)

    widget._handle_view_msg_helper({"type": "add_row"})

    assert event_history == [{
        "name": "row_added",
        "index": 4,
        "source": "gui"
    }]

    # make sure the added row in the internal dataframe contains the
    # expected values
    added_index = event_history[0]["index"]
    expected_values = pd.Series({
        "modin_spreadsheet_unfiltered_index": 4,
        "A": 1,
        "C": 1,
        "D": 3,
        "Date": pd.Timestamp("2013-01-02 00:00:00"),
        "E": "bar",
        "F": "fox",
    })
    sort_idx = widget._df.loc[added_index].index
    assert (widget._df.loc[added_index] == expected_values[sort_idx]).all()
示例#14
0
def test_astype():
    td = pandas.DataFrame(
        test_data["int_data"])[["col1", "index", "col3", "col4"]]
    modin_df = pd.DataFrame(td.values, index=td.index, columns=td.columns)
    expected_df = pandas.DataFrame(td.values,
                                   index=td.index,
                                   columns=td.columns)

    modin_df_casted = modin_df.astype(np.int32)
    expected_df_casted = expected_df.astype(np.int32)
    df_equals(modin_df_casted, expected_df_casted)

    modin_df_casted = modin_df.astype(np.float64)
    expected_df_casted = expected_df.astype(np.float64)
    df_equals(modin_df_casted, expected_df_casted)

    modin_df_casted = modin_df.astype(str)
    expected_df_casted = expected_df.astype(str)
    df_equals(modin_df_casted, expected_df_casted)

    modin_df_casted = modin_df.astype("category")
    expected_df_casted = expected_df.astype("category")
    df_equals(modin_df_casted, expected_df_casted)

    dtype_dict = {"col1": np.int32, "index": np.int64, "col3": str}
    modin_df_casted = modin_df.astype(dtype_dict)
    expected_df_casted = expected_df.astype(dtype_dict)
    df_equals(modin_df_casted, expected_df_casted)

    # Ignore lint because this is testing bad input
    bad_dtype_dict = {
        "index": np.int32,
        "index": np.int64,
        "index": str
    }  # noqa F601
    modin_df_casted = modin_df.astype(bad_dtype_dict)
    expected_df_casted = expected_df.astype(bad_dtype_dict)
    df_equals(modin_df_casted, expected_df_casted)

    modin_df = pd.DataFrame(index=["row1"], columns=["col1"])
    modin_df["col1"]["row1"] = 11
    modin_df_casted = modin_df.astype(int)
    expected_df = pandas.DataFrame(index=["row1"], columns=["col1"])
    expected_df["col1"]["row1"] = 11
    expected_df_casted = expected_df.astype(int)
    df_equals(modin_df_casted, expected_df_casted)

    with pytest.raises(KeyError):
        modin_df.astype({"not_exists": np.uint8})

    # The dtypes series must have a unique index.
    eval_general(
        modin_df,
        expected_df,
        lambda df: df.astype(
            pd.Series([str, str], index=["col1", "col1"])
            if isinstance(df, pd.DataFrame) else pandas.Series(
                [str, str], index=["col1", "col1"])),
    )
示例#15
0
 def applyier(df):
     if convert_to_series:
         converted_value = (pandas.Series(value) if isinstance(
             df, pandas.DataFrame) else pd.Series(value))
     else:
         converted_value = value
     df[new_col_id] = converted_value
     return df
示例#16
0
def test_concat_series_only():
    modin_series = pd.Series(list(range(1000)))
    pandas_series = pandas.Series(list(range(1000)))

    df_equals(
        pd.concat([modin_series, modin_series]),
        pandas.concat([pandas_series, pandas_series]),
    )
示例#17
0
def test_asfreq():
    index = pd.date_range("1/1/2000", periods=4, freq="T")
    series = pd.Series([0.0, None, 2.0, 3.0], index=index)
    df = pd.DataFrame({"s": series})
    with pytest.warns(UserWarning):
        # We are only testing that this defaults to pandas, so we will just check for
        # the warning
        df.asfreq(freq="30S")
示例#18
0
def test___setitem__with_mismatched_partitions():
    fname = "200kx99.csv"
    np.savetxt(fname, np.random.randint(0, 100, size=(200_000, 99)), delimiter=",")
    modin_df = pd.read_csv(fname)
    pandas_df = pandas.read_csv(fname)
    modin_df["new"] = pd.Series(list(range(len(modin_df))))
    pandas_df["new"] = pandas.Series(list(range(len(pandas_df))))
    df_equals(modin_df, pandas_df)
示例#19
0
def test_constructor(data):
    pandas_df = pandas.DataFrame(data)
    modin_df = pd.DataFrame(data)
    df_equals(pandas_df, modin_df)

    pandas_df = pandas.DataFrame({k: pandas.Series(v) for k, v in data.items()})
    modin_df = pd.DataFrame({k: pd.Series(v) for k, v in data.items()})
    df_equals(pandas_df, modin_df)
示例#20
0
def test_frame_fillna_limit(data, limit):
    pandas_df = pandas.DataFrame(data)

    replace_pandas_series = pandas_df.columns.to_series().sample(frac=1)
    replace_dict = replace_pandas_series.to_dict()
    replace_pandas_df = pandas.DataFrame(
        {col: pandas_df.index.to_series()
         for col in pandas_df.columns},
        index=pandas_df.index,
    ).sample(frac=1)
    replace_modin_series = pd.Series(replace_pandas_series)
    replace_modin_df = pd.DataFrame(replace_pandas_df)

    index = pandas_df.index
    result = pandas_df[:2].reindex(index)
    modin_df = pd.DataFrame(result)

    if isinstance(limit, float):
        limit = int(len(modin_df) * limit)
    if limit is not None and limit < 0:
        limit = len(modin_df) + limit

    df_equals(
        modin_df.fillna(method="pad", limit=limit),
        result.fillna(method="pad", limit=limit),
    )
    df_equals(
        modin_df.fillna(replace_dict, limit=limit),
        result.fillna(replace_dict, limit=limit),
    )
    df_equals(
        modin_df.fillna(replace_modin_series, limit=limit),
        result.fillna(replace_pandas_series, limit=limit),
    )
    df_equals(
        modin_df.fillna(replace_modin_df, limit=limit),
        result.fillna(replace_pandas_df, limit=limit),
    )

    result = pandas_df[-2:].reindex(index)
    modin_df = pd.DataFrame(result)
    df_equals(
        modin_df.fillna(method="backfill", limit=limit),
        result.fillna(method="backfill", limit=limit),
    )
    df_equals(
        modin_df.fillna(replace_dict, limit=limit),
        result.fillna(replace_dict, limit=limit),
    )
    df_equals(
        modin_df.fillna(replace_modin_series, limit=limit),
        result.fillna(replace_pandas_series, limit=limit),
    )
    df_equals(
        modin_df.fillna(replace_modin_df, limit=limit),
        result.fillna(replace_pandas_df, limit=limit),
    )
示例#21
0
def get_test_data():
    return {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "foo", "bar"]),
        "F": ["foo", "bar", "buzz", "fox"],
    }
示例#22
0
    def test_modin_series_errors_on_missing_transformations(self):
        LOG.info("test_modin_series_errors_on_missing_transformations")
        md = self.modinSetUp()
        series = md.Series()
        with self.assertRaises(NotImplementedError):
            series.swifter.rolling(1)

        with self.assertRaises(NotImplementedError):
            series.swifter.resample(1)
示例#23
0
def test_concat_with_empty_frame():
    modin_empty_df = pd.DataFrame()
    pandas_empty_df = pandas.DataFrame()
    modin_row = pd.Series({0: "a", 1: "b"})
    pandas_row = pandas.Series({0: "a", 1: "b"})
    df_equals(
        pd.concat([modin_empty_df, modin_row]),
        pandas.concat([pandas_empty_df, pandas_row]),
    )
def calculateAuthorRanking(stars_list, authors_loves, authors_views,
                           nFollowers, shared):
    import modin.pandas as pd
    import numpy as np

    stars_list = pd.Series(stars_list)
    authors_loves = pd.Series(authors_loves)
    authors_views = pd.Series(authors_views)
    nFollowers = pd.Series(nFollowers)
    shared = pd.Series(shared)

    author_ranking = (stars_list + nFollowers + authors_loves +
                      authors_views) / shared
    author_ranking = author_ranking.fillna(0)
    author_ranking = author_ranking.replace([np.inf, -np.inf], 0)
    author_ranking = author_ranking.tolist()

    return author_ranking
示例#25
0
def test_get_dummies():
    s = pd.Series(list("abca"))
    with pytest.warns(UserWarning):
        pd.get_dummies(s)

    s1 = ["a", "b", np.nan]
    with pytest.warns(UserWarning):
        pd.get_dummies(s1)

    with pytest.warns(UserWarning):
        pd.get_dummies(s1, dummy_na=True)

    data = {"A": ["a", "b", "a"], "B": ["b", "a", "c"], "C": [1, 2, 3]}
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    modin_result = pd.get_dummies(modin_df, prefix=["col1", "col2"])
    pandas_result = pandas.get_dummies(pandas_df, prefix=["col1", "col2"])
    df_equals(modin_result, pandas_result)
    assert modin_result._to_pandas().columns.equals(pandas_result.columns)
    assert modin_result.shape == pandas_result.shape

    modin_result = pd.get_dummies(pd.DataFrame(pd.Series(list("abcdeabac"))))
    pandas_result = pandas.get_dummies(
        pandas.DataFrame(pandas.Series(list("abcdeabac")))
    )
    df_equals(modin_result, pandas_result)
    assert modin_result._to_pandas().columns.equals(pandas_result.columns)
    assert modin_result.shape == pandas_result.shape

    with pytest.raises(NotImplementedError):
        pd.get_dummies(modin_df, prefix=["col1", "col2"], sparse=True)

    with pytest.warns(UserWarning):
        pd.get_dummies(pd.Series(list("abcaa")))

    with pytest.warns(UserWarning):
        pd.get_dummies(pd.Series(list("abcaa")), drop_first=True)

    with pytest.warns(UserWarning):
        pd.get_dummies(pd.Series(list("abc")), dtype=float)

    with pytest.warns(UserWarning):
        pd.get_dummies(1)
示例#26
0
def test_dot(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)
    col_len = len(modin_df.columns)

    # Test list input
    arr = np.arange(col_len)
    modin_result = modin_df.dot(arr)
    pandas_result = pandas_df.dot(arr)
    df_equals(modin_result, pandas_result)

    # Test bad dimensions
    with pytest.raises(ValueError):
        modin_result = modin_df.dot(np.arange(col_len + 10))

    # Test series input
    modin_series = pd.Series(np.arange(col_len), index=modin_df.columns)
    pandas_series = pandas.Series(np.arange(col_len), index=pandas_df.columns)
    modin_result = modin_df.dot(modin_series)
    pandas_result = pandas_df.dot(pandas_series)
    df_equals(modin_result, pandas_result)

    # Test dataframe input
    modin_result = modin_df.dot(modin_df.T)
    pandas_result = pandas_df.dot(pandas_df.T)
    df_equals(modin_result, pandas_result)

    # Test when input series index doesn't line up with columns
    with pytest.raises(ValueError):
        modin_result = modin_df.dot(pd.Series(np.arange(col_len)))

    # Test case when left dataframe has size (n x 1)
    # and right dataframe has size (1 x n)
    modin_df = pd.DataFrame(modin_series)
    pandas_df = pandas.DataFrame(pandas_series)
    modin_result = modin_df.dot(modin_df.T)
    pandas_result = pandas_df.dot(pandas_df.T)
    df_equals(modin_result, pandas_result)

    # Test case when left dataframe has size (1 x 1)
    # and right dataframe has size (1 x n)
    modin_result = pd.DataFrame([1]).dot(modin_df.T)
    pandas_result = pandas.DataFrame([1]).dot(pandas_df.T)
    df_equals(modin_result, pandas_result)
示例#27
0
def test_aligning_blocks():
    # Test problem when modin frames have the same number of rows, but different
    # blocks (partition.list_of_blocks). See #2322 for details
    accm = pd.DataFrame(["-22\n"] * 162)
    accm = accm.iloc[2:, :]
    accm.reset_index(drop=True, inplace=True)
    accm["T"] = pd.Series(["24.67\n"] * 145)

    # see #2322 for details
    repr(accm)
示例#28
0
def test_to_numeric(data, errors, downcast):
    modin_series = pd.Series(data)
    pandas_series = pandas.Series(data)
    modin_result = pd.to_numeric(modin_series,
                                 errors=errors,
                                 downcast=downcast)
    pandas_result = pandas.to_numeric(pandas_series,
                                      errors=errors,
                                      downcast=downcast)
    df_equals(modin_result, pandas_result)
示例#29
0
def test_notnull(data):
    pandas_df = pandas.DataFrame(data)
    modin_df = pd.DataFrame(data)

    pandas_result = pandas.notnull(pandas_df)
    modin_result = pd.notnull(modin_df)
    df_equals(modin_result, pandas_result)

    modin_result = pd.notnull(pd.Series([1, np.nan, 2]))
    pandas_result = pandas.notnull(pandas.Series([1, np.nan, 2]))
    df_equals(modin_result, pandas_result)

    assert pd.isna(np.nan) == pandas.isna(np.nan)
示例#30
0
def test_series_dt_index(closed):
    index = pandas.date_range("1/1/2000", periods=12, freq="T")
    pandas_series = pandas.Series(range(12), index=index)
    modin_series = pd.Series(range(12), index=index)

    pandas_rolled = pandas_series.rolling("3s", closed=closed)
    modin_rolled = modin_series.rolling("3s", closed=closed)
    df_equals(modin_rolled.count(), pandas_rolled.count())
    df_equals(modin_rolled.skew(), pandas_rolled.skew())
    df_equals(modin_rolled.apply(np.sum, raw=True),
              pandas_rolled.apply(np.sum, raw=True))
    df_equals(modin_rolled.aggregate(np.sum), pandas_rolled.aggregate(np.sum))
    df_equals(modin_rolled.quantile(0.1), pandas_rolled.quantile(0.1))