def test_object_dtype_categorical(): cat_series = pd.Series( pd.Categorical(my_object_vals, categories=my_object_vals)) widget = show_grid(cat_series) constraints_enum = widget._columns[0]["constraints"]["enum"] assert not isinstance(constraints_enum[0], dict) assert not isinstance(constraints_enum[1], dict) widget._handle_view_msg_helper({ "type": "show_filter_dropdown", "field": 0, "search_val": None }) widget._handle_view_msg_helper({ "field": 0, "filter_info": { "field": 0, "selected": [0], "type": "text", "excluded": [], }, "type": "change_filter", }) assert len(widget._df) == 1 assert widget._df[0][0] == cat_series[0]
def test_crosstab(): a = np.array( [ "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar", "foo", "foo", "foo" ], dtype=object, ) b = np.array( [ "one", "one", "one", "two", "one", "one", "one", "two", "two", "two", "one" ], dtype=object, ) c = np.array( [ "dull", "dull", "shiny", "dull", "dull", "shiny", "shiny", "dull", "shiny", "shiny", "shiny", ], dtype=object, ) with warns_that_defaulting_to_pandas(): df = pd.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"]) assert isinstance(df, pd.DataFrame) foo = pd.Categorical(["a", "b"], categories=["a", "b", "c"]) bar = pd.Categorical(["d", "e"], categories=["d", "e", "f"]) with warns_that_defaulting_to_pandas(): df = pd.crosstab(foo, bar) assert isinstance(df, pd.DataFrame) with warns_that_defaulting_to_pandas(): df = pd.crosstab(foo, bar, dropna=False) assert isinstance(df, pd.DataFrame)
def get_test_data(): return { "A": 1.0, "B": pd.Timestamp("20130102"), "C": pd.Series(1, index=list(range(4)), dtype="float32"), "D": np.array([3] * 4, dtype="int32"), "E": pd.Categorical(["test", "train", "foo", "bar"]), "F": ["foo", "bar", "buzz", "fox"], }
def test_unique(): modin_result = pd.unique([2, 1, 3, 3]) pandas_result = pandas.unique([2, 1, 3, 3]) assert_array_equal(modin_result, pandas_result) modin_result = pd.unique(pd.Series([2] + [1] * 5)) pandas_result = pandas.unique(pandas.Series([2] + [1] * 5)) assert_array_equal(modin_result, pandas_result) modin_result = pd.unique( pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")]) ) pandas_result = pandas.unique( pandas.Series([pandas.Timestamp("20160101"), pandas.Timestamp("20160101")]) ) assert_array_equal(modin_result, pandas_result) modin_result = pd.unique( pd.Series( [ pd.Timestamp("20160101", tz="US/Eastern"), pd.Timestamp("20160101", tz="US/Eastern"), ] ) ) pandas_result = pandas.unique( pandas.Series( [ pandas.Timestamp("20160101", tz="US/Eastern"), pandas.Timestamp("20160101", tz="US/Eastern"), ] ) ) assert_array_equal(modin_result, pandas_result) modin_result = pd.unique( pd.Index( [ pd.Timestamp("20160101", tz="US/Eastern"), pd.Timestamp("20160101", tz="US/Eastern"), ] ) ) pandas_result = pandas.unique( pandas.Index( [ pandas.Timestamp("20160101", tz="US/Eastern"), pandas.Timestamp("20160101", tz="US/Eastern"), ] ) ) assert_array_equal(modin_result, pandas_result) modin_result = pd.unique(pd.Series(pd.Categorical(list("baabc")))) pandas_result = pandas.unique(pandas.Series(pandas.Categorical(list("baabc")))) assert_array_equal(modin_result, pandas_result)
def create_df(): return pd.DataFrame({ "A": 1.0, "Date": pd.Timestamp("20130102"), "C": pd.Series(1, index=list(range(4)), dtype="float32"), "D": np.array([3] * 4, dtype="int32"), "E": pd.Categorical(["test", "train", "foo", "bar"]), "F": ["foo", "bar", "buzz", "fox"], })
def test_2195(datetime_is_numeric, has_numeric_column): data = { "categorical": pd.Categorical(["d"] * 10**2), "date": [np.datetime64("2000-01-01")] * 10**2, } if has_numeric_column: data.update({"numeric": [5] * 10**2}) modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data) eval_general( modin_df, pandas_df, lambda df: df.describe(datetime_is_numeric=datetime_is_numeric), )