def test(): sr = pd.Series([1, 2, 3]) array = np.asarray(sr) x = np.array([1, 2, 3]) assert np.array_equal(array, x) y = np.array([4, 5, 6]) z = np.add(sr, y) assert np.array_equal(z, x + y) df = pd.DataFrame({"x": x, "y": y}) z = np.add(df["x"], df["y"]) assert np.array_equal(z, x + y) return
"2000-01-11 11:00:00", "2000-01-12", "2000-01-13", "2000-01-14", "2000-01-15", "2000-01-16", "2000-01-17", "2000-01-18", "2000-01-19", "2000-01-20", "2000-01-21", "2000-01-22", "2000-01-23", ] s = pd.Series(s, dtype=pd.StringDtype()) ls = lp.Series(s) date_s = pd.to_datetime(s, format="%Y-%m-%d %H:%M:%S") date_ls = lp.to_datetime(ls, format="%Y-%m-%d %H:%M:%S") int_s = date_s.astype(np.int64) int_ls = date_ls.astype(np.int64) assert int_ls.equals(int_s) str_s = date_s.astype("string") str_ls = date_ls.astype("string") assert str_ls.equals(str_s) date_s = str_s.astype("datetime64[ns]")
# you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import legate.numpy as np from legate import pandas as lp s1 = lp.Series([1, 2, 3]) data1 = s1.__legate_data_interface__ assert data1["version"] == 1 assert len(data1["data"]) == 1 for field, array in data1["data"].items(): assert field.name == "column0" assert str(field.type) == "int64" assert not field.nullable stores = array.stores() assert len(stores) == 2 assert stores[0] is None arr1 = np.array(s1) arr2 = np.array([1, 2, 3]) assert np.array_equal(arr1, arr2)
# import pandas as pd from numpy.random import permutation from legate import pandas as lp from tests.utils import equals, equals_scalar for index in [ pd.RangeIndex(1, 21, 2), pd.RangeIndex(21, 1, -2), pd.Index(permutation(10)), ]: print(f"Index: {index}") sr = pd.Series(range(10), index=index) lsr = lp.Series(range(10), index=index) assert equals(lsr.loc[:], sr.loc[:]) assert equals_scalar(lsr.loc[index[0]], sr.loc[index[0]]) assert equals(lsr.loc[index[3]:index[-4]], sr.loc[index[3]:index[-4]]) assert equals(lsr.loc[:index[-3]], sr.loc[:index[-3]]) assert equals(lsr.loc[index[2]:], sr.loc[index[2]:]) pd_mask = sr % 3 == 0 lp_mask = mask = lsr % 3 == 0 for mask in [pd_mask, pd_mask.values, pd_mask.to_list(), lp_mask]: assert equals(lsr.loc[mask], sr.loc[pd_mask]) sr.loc[pd_mask] = sr.loc[pd_mask] + 100
def similar(a, b): return ((a - b).abs() < 1e-14).all() for n in [100]: s1 = pd.Series(randn(1, n)[0]) s2 = pd.Series(randn(1, n)[0]) for i in range(n): if (i + 1) % 4 == 0: s1[i] = nan if (i + 1) % 3 == 0: s2[i] = nan ls1 = lp.Series(s1) ls2 = lp.Series(s2) ops = [ "add", "sub", "mul", "div", "truediv", "floordiv", "mod", # TODO: nans_to_nulls is required to match the pandas result # "pow", ] for op in ops:
from tests.utils import equals n = 10 sr = pd.Series( np.array(randn(1, n)[0] * 100.0, dtype="int64"), name="A", index=pd.MultiIndex.from_arrays( [ np.array(permutation(n), dtype="int64"), np.array(permutation(n), dtype="int64"), np.array(permutation(n), dtype="int64"), ], names=("k1", "k2", "k3"), ), ) lsr = lp.Series(sr) for lvl in [["k1"], [1, "k1"], [0, 2]]: print(f"level={lvl}, op=sum") out_sr = sr.groupby(level=lvl, sort=True).sum() out_lsr = lsr.groupby(level=lvl, sort=True).sum() assert equals(out_lsr, out_sr) agg = ["sum", "max"] print(f"level={lvl}, op={agg}") out_sr = sr.groupby(level=lvl, sort=True).agg(agg) out_lsr = lsr.groupby(level=lvl, sort=True).agg(agg) # Pandas ignores the value of as_index in fantastic ways # (GH #13217) and rearranging its output to match with Legate # is really painful. After all my attempts to make a sensible
# import pandas as pd from legate import pandas as lp from tests.utils import equals, must_fail def _test(ex, sr, *args): def _iloc(): sr.iloc[args] must_fail(ex, _iloc) n = 17 for index in [pd.RangeIndex(3, n + 3), pd.Index(list(range(3, n + 3)))]: sr_copy = lp.Series(range(n), index=index) sr = lp.Series(range(n), index=index) _test(KeyError, sr, n) assert len(sr.iloc[n : n + 1]) == 0 sr.iloc[n] = 100 assert equals(sr_copy, sr) sr.iloc[n : n + 1] = 200 assert equals(sr_copy, sr)
# Copyright 2021 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # try: from legate.numpy.random import random except ModuleNotFoundError: from numpy.random import random import pandas as pd from legate import pandas as lp x = random(100000) s = pd.Series(x) ls = lp.Series(x) assert ls.equals(lp.Series(s))
# Copyright 2021 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from legate.core import LEGATE_MAX_FIELDS from legate import pandas as lp s = lp.Series([1, 2, 3, 4]) all_series = [] for i in range(LEGATE_MAX_FIELDS): all_series.append(s + 1)
def _test(ex, sr, *args): def _make_access(): sr.at[args] must_fail(ex, _make_access) for index in [ pd.RangeIndex(10), pd.RangeIndex(1, 21, 2), pd.RangeIndex(21, 1, -2), pd.Index(permutation(10)), ]: print(f"Index: {index}") sr = pd.Series(range(10), index=index) lsr = lp.Series(sr) for idx in range(3, 8): print(f"Testing lsr.at[{index[idx]}].__getitem__") out_pd = sr.at[index[idx]] out_lp = lsr.at[index[idx]] assert equals_scalar(out_lp, out_pd) for idx, val in enumerate([100, 200, 300]): print(f"Testing lsr.at[{index[idx + 3]}].__setitem__") sr.at[index[idx + 3]] = val lsr.at[index[idx + 3]] = val out_pd = sr.at[index[idx + 3]] out_lp = lsr.at[index[idx + 3]] assert equals_scalar(out_lp, out_pd)
# See the License for the specific language governing permissions and # limitations under the License. # import pandas as pd from numpy.random import permutation, randn from legate import pandas as lp from tests.utils import equals n = 17 indices = [pd.RangeIndex(1, n + 1), pd.Index(permutation(n))] for index in indices: print(f"Index: {index}") s1 = pd.Series(randn(n), index=index) ls1 = lp.Series(s1) s2 = pd.Series(randn(n), index=index) out_s = s1 + s2 out_ls = ls1 + s2 assert equals(out_ls, out_s) out_s = s1 + s2.values out_ls = ls1 + s2.values assert equals(out_ls, out_s) out_s = s1 + s2.to_list() out_ls = ls1 + s2.to_list() assert equals(out_ls, out_s)
result = a.equals(b) if not result: print(a) print(b) return result indices = [ pd.RangeIndex(1, 4), pd.RangeIndex(9, step=2), pd.RangeIndex(1, 20, step=3), ] for index in indices: sr = pd.Series(list(range(len(index))), index=index) lsr = lp.Series(sr) # Passing Legate series as arguments assert equals(lp.Series(lsr), pd.Series(sr)) assert equals( lp.Series(lsr, dtype="float32"), pd.Series(sr, dtype="float32"), ) assert equals(lp.Series(lsr, name="A"), pd.Series(sr, name="A")) # Passing Pandas series as arguments assert equals(lp.Series(sr), pd.Series(sr)) assert equals( lp.Series(sr, dtype="float32"), pd.Series(sr, dtype="float32"), )
# Passing Pandas dataframes as arguments assert equals(lp.DataFrame(df, dtype="float64"), pd.DataFrame(df, dtype="float64")) assert equals(lp.DataFrame(df, columns=["a"]), pd.DataFrame(df, columns=["a"])) assert equals( lp.DataFrame(df, columns=["a"], dtype="float64"), pd.DataFrame(df, columns=["a"], dtype="float64"), ) # Passing named Legate series as arguments sr_a = pd.Series([1, 2, 3], index=index) sr_b = pd.Series([4, 5, 6], index=index) lsr_a = lp.Series([1, 2, 3], index=index) lsr_b = lp.Series([4, 5, 6], index=index) assert equals( lp.DataFrame({ "a": lsr_a, "b": lsr_b }), pd.DataFrame({ "a": sr_a, "b": sr_b }), ) # Passing Pandas series as arguments assert equals(lp.DataFrame(df["a"]), pd.DataFrame(df["a"]))
# Copyright 2021 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import legate.pandas as lp from tests.utils import must_fail def _test(ex, sr, *args): def _test(): sr.astype(*args) must_fail(ex, _test) _test(ValueError, lp.Series(["a"], dtype="category"), "int") _test(NotImplementedError, lp.Series([1]), "datetime64[ns]")