def test_datetime_series_binops_pandas(lhs_dtype, rhs_dtype): pd_data_1 = pd.Series( pd.date_range("20010101", "20020215", freq="400h", name="times")) pd_data_2 = pd.Series( pd.date_range("20010101", "20020215", freq="401h", name="times")) gdf_data_1 = Series(pd_data_1).astype(lhs_dtype) gdf_data_2 = Series(pd_data_2).astype(rhs_dtype) assert_eq(pd_data_1, gdf_data_1.astype("datetime64[ns]")) assert_eq(pd_data_2, gdf_data_2.astype("datetime64[ns]")) assert_eq(pd_data_1 < pd_data_2, gdf_data_1 < gdf_data_2) assert_eq(pd_data_1 > pd_data_2, gdf_data_1 > gdf_data_2) assert_eq(pd_data_1 == pd_data_2, gdf_data_1 == gdf_data_2) assert_eq(pd_data_1 <= pd_data_2, gdf_data_1 <= gdf_data_2) assert_eq(pd_data_1 >= pd_data_2, gdf_data_1 >= gdf_data_2)
def test_string_numeric_astype(dtype): if dtype.startswith("bool"): data = [1, 0, 1, 0, 1] elif dtype.startswith("int"): data = [1, 2, 3, 4, 5] elif dtype.startswith("float"): data = [1.0, 2.0, 3.0, 4.0, 5.0] elif dtype.startswith("datetime64"): data = [1000000000, 2000000000, 3000000000, 4000000000, 5000000000] if dtype.startswith("datetime64"): ps = pd.Series(data, dtype="datetime64[ns]") gs = Series.from_pandas(ps) else: ps = pd.Series(data, dtype=dtype) gs = Series(data, dtype=dtype) # Pandas datetime64 --> str typecasting returns arbitrary format depending # on the data, so making it consistent unless we choose to match the # behavior if dtype.startswith("datetime64"): expect = ps.dt.strftime("%Y-%m-%dT%H:%M:%SZ") else: expect = ps.astype("str") got = gs.astype("str") assert_eq(expect, got)
def test_string_astype(dtype): if dtype.startswith("int"): data = ["1", "2", "3", "4", "5"] elif dtype.startswith("float"): data = ["1.0", "2.0", "3.0", "4.0", "5.0"] elif dtype.startswith("bool"): data = ["True", "False", "True", "False", "False"] elif dtype.startswith("datetime64"): data = [ "2019-06-04T00:00:00Z", "2019-06-04T12:12:12Z", "2019-06-03T00:00:00Z", "2019-05-04T00:00:00Z", "2018-06-04T00:00:00Z", ] elif dtype == "str" or dtype == "object": data = ["ab", "cd", "ef", "gh", "ij"] ps = pd.Series(data) gs = Series(data) # Pandas str --> bool typecasting always returns True if there's a string if dtype.startswith("bool"): expect = ps == "True" else: expect = ps.astype(dtype) got = gs.astype(dtype) assert_eq(expect, got)
def test_typecast_to_from_datetime(data, from_dtype, to_dtype): np_data = data.astype(from_dtype) gdf_data = Series(np_data) np_casted = np_data.astype(to_dtype).astype(from_dtype) gdf_casted = gdf_data.astype(to_dtype).astype(from_dtype) np.testing.assert_equal(np_casted, np.array(gdf_casted))
def test_typecast_from_datetime_to_datetime(data, from_dtype, to_dtype): np_data = data.astype(from_dtype) gdf_col = Series(np_data)._column np_casted = np_data.astype(to_dtype) gdf_casted = gdf_col.astype(to_dtype) np.testing.assert_equal(np_casted, gdf_casted.to_array())
def test_typecast_to_datetime(data, dtype): np_data = data.astype(dtype) gdf_data = Series(np_data) np_casted = np_data.astype('datetime64[ms]') gdf_casted = gdf_data.astype('datetime64[ms]') np.testing.assert_equal(np_casted, np.array(gdf_casted))
def func(index): arr = np.random.random(100) * 10 sr = Series(arr) result = binop(sr.astype('int32'), sr) expect = binop(arr.astype('int32'), arr) np.testing.assert_almost_equal(result.to_array(), expect, decimal=5)
def test_string_empty_astype(dtype): data = [] ps = pd.Series(data, dtype="str") gs = Series(data, dtype="str") expect = ps.astype(dtype) got = gs.astype(dtype) assert_eq(expect, got)
def test_typecast_from_datetime_to_int64_to_datetime(data, dtype): pd_data = pd.Series(data.copy()) np_data = np.array(pd_data) gdf_data = Series(pd_data) np_casted = np_data.astype(np.int64).astype(dtype) gdf_casted = gdf_data.astype(np.int64).astype(dtype) np.testing.assert_equal(np_casted, np.array(gdf_casted))
def test_string_astype(dtype): if dtype.startswith('int'): data = ["1", "2", "3", "4", "5"] elif dtype.startswith('float'): data = ["1.0", "2.0", "3.0", "4.0", "5.0"] ps = pd.Series(data) gs = Series(data) expect = ps.astype(dtype) got = gs.astype(dtype) assert_eq(expect, got)
def test_string_empty_numeric_astype(dtype): data = [] if dtype.startswith("datetime64"): ps = pd.Series(data, dtype="datetime64[ns]") else: ps = pd.Series(data, dtype=dtype) gs = Series(data, dtype=dtype) expect = ps.astype("str") got = gs.astype("str") assert_eq(expect, got)
def test_string_empty_numeric_astype(dtype): data = [] if dtype.startswith('datetime64'): ps = pd.Series(data, dtype='datetime64[ns]') else: ps = pd.Series(data, dtype=dtype) gs = Series(data, dtype=dtype) expect = ps.astype('str') got = gs.astype('str') assert_eq(expect, got)
def test_date_minmax(): np_data = np.random.normal(size=10 ** 3) gdf_data = Series(np_data) np_casted = np_data.astype("datetime64[ms]") gdf_casted = gdf_data.astype("datetime64[ms]") np_min = np_casted.min() gdf_min = gdf_casted.min() assert np_min == gdf_min np_max = np_casted.max() gdf_max = gdf_casted.max() assert np_max == gdf_max
def test_string_empty_numeric_astype(dtype): if dtype.startswith('bool'): pytest.xfail("booleans not yet supported") elif dtype.startswith('float'): pytest.xfail("floats not yet supported") data = [] ps = pd.Series(data, dtype=dtype) gs = Series(data, dtype=dtype) expect = ps.astype('str') got = gs.astype('str') assert_eq(expect, got)
def test_string_numeric_astype(dtype): if dtype.startswith('bool'): pytest.xfail("booleans not yet supported") data = [1, 0, 1, 0, 1] elif dtype.startswith('int'): data = [1, 2, 3, 4, 5] elif dtype.startswith('float'): pytest.xfail("floats not yet supported") data = [1.0, 2.0, 3.0, 4.0, 5.0] ps = pd.Series(data, dtype=dtype) gs = Series(data, dtype=dtype) expect = ps.astype('str') got = gs.astype('str') assert_eq(expect, got)
def test_string_astype(dtype): if dtype.startswith('int'): data = ["1", "2", "3", "4", "5"] elif dtype.startswith('float'): data = ["1.0", "2.0", "3.0", "4.0", "5.0"] elif dtype.startswith('bool'): data = ["True", "False", "True", "False", "False"] elif dtype.startswith('datetime64'): data = [ "2019-06-04T00:00:00Z", "2019-06-04T12:12:12Z", "2019-06-03T00:00:00Z", "2019-05-04T00:00:00Z", "2018-06-04T00:00:00Z" ] ps = pd.Series(data) gs = Series(data) # Pandas str --> bool typecasting always returns True if there's a string if dtype.startswith('bool'): expect = (ps == 'True') else: expect = ps.astype(dtype) got = gs.astype(dtype) assert_eq(expect, got)