def test_series_frame_radd_bug(self): # GH#353 vals = pd.Series(tm.rands_array(5, 10)) result = "foo_" + vals expected = vals.map(lambda x: "foo_" + x) tm.assert_series_equal(result, expected) frame = pd.DataFrame({"vals": vals}) result = "foo_" + frame expected = pd.DataFrame({"vals": vals.map(lambda x: "foo_" + x)}) tm.assert_frame_equal(result, expected) ts = tm.makeTimeSeries() ts.name = "ts" # really raise this time now = pd.Timestamp.now().to_pydatetime() msg = "unsupported operand type" with pytest.raises(TypeError, match=msg): now + ts with pytest.raises(TypeError, match=msg): ts + now
def test_concat_series(self): ts = tm.makeTimeSeries() ts.name = "foo" pieces = [ts[:5], ts[5:15], ts[15:]] result = concat(pieces) tm.assert_series_equal(result, ts) assert result.name == ts.name result = concat(pieces, keys=[0, 1, 2]) expected = ts.copy() ts.index = DatetimeIndex(np.array(ts.index.values, dtype="M8[ns]")) exp_codes = [ np.repeat([0, 1, 2], [len(x) for x in pieces]), np.arange(len(ts)) ] exp_index = MultiIndex(levels=[[0, 1, 2], ts.index], codes=exp_codes) expected.index = exp_index tm.assert_series_equal(result, expected)
def test_series_describe_single(): ts = tm.makeTimeSeries() grouped = ts.groupby(lambda x: x.month) result = grouped.apply(lambda x: x.describe()) expected = grouped.describe().stack() tm.assert_series_equal(result, expected)
def setup_method(self): self.ts = tm.makeTimeSeries() # Was at top level in test_series self.ts.name = "ts" self.series = tm.makeStringSeries() self.series.name = "series"
def test_reindex(self, float_frame): datetime_series = tm.makeTimeSeries(nper=30) newFrame = float_frame.reindex(datetime_series.index) for col in newFrame.columns: for idx, val in newFrame[col].items(): if idx in float_frame.index: if np.isnan(val): assert np.isnan(float_frame[col][idx]) else: assert val == float_frame[col][idx] else: assert np.isnan(val) for col, series in newFrame.items(): assert tm.equalContents(series.index, newFrame.index) emptyFrame = float_frame.reindex(Index([])) assert len(emptyFrame.index) == 0 # Cython code should be unit-tested directly nonContigFrame = float_frame.reindex(datetime_series.index[::2]) for col in nonContigFrame.columns: for idx, val in nonContigFrame[col].items(): if idx in float_frame.index: if np.isnan(val): assert np.isnan(float_frame[col][idx]) else: assert val == float_frame[col][idx] else: assert np.isnan(val) for col, series in nonContigFrame.items(): assert tm.equalContents(series.index, nonContigFrame.index) # corner cases # Same index, copies values but not index if copy=False newFrame = float_frame.reindex(float_frame.index, copy=False) assert newFrame.index is float_frame.index # length zero newFrame = float_frame.reindex([]) assert newFrame.empty assert len(newFrame.columns) == len(float_frame.columns) # length zero with columns reindexed with non-empty index newFrame = float_frame.reindex([]) newFrame = newFrame.reindex(float_frame.index) assert len(newFrame.index) == len(float_frame.index) assert len(newFrame.columns) == len(float_frame.columns) # pass non-Index newFrame = float_frame.reindex(list(datetime_series.index)) expected = datetime_series.index._with_freq(None) tm.assert_index_equal(newFrame.index, expected) # copy with no axes result = float_frame.reindex() tm.assert_frame_equal(result, float_frame) assert result is not float_frame
def test_pickle_preserve_name(name): unpickled = tm.round_trip_pickle(tm.makeTimeSeries(name=name)) assert unpickled.name == name
def ts(): return tm.makeTimeSeries()
def test_apply_scaler_on_date_time_index_aware_series(self): # GH 25959 # Calling apply on a localized time series should not cause an error series = tm.makeTimeSeries(nper=30).tz_localize("UTC") result = Series(series.index).apply(lambda x: 1) tm.assert_series_equal(result, Series(np.ones(30), dtype="int64"))
# GH20495 s = Series(vals + [np.nan]) result = s.map(mapping) tm.assert_series_equal(result, Series(exp)) @pytest.mark.parametrize( "dti,exp", [ ( Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])), DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"), ), ( tm.makeTimeSeries(nper=30), DataFrame(np.repeat([[1, 2]], 30, axis=0), dtype="int64"), ), ], ) @pytest.mark.parametrize("aware", [True, False]) def test_apply_series_on_date_time_index_aware_series(dti, exp, aware): # GH 25959 # Calling apply on a localized time series should not cause an error if aware: index = dti.tz_localize("UTC").index else: index = dti.index result = Series(index).apply(lambda x: Series([1, 2])) tm.assert_frame_equal(result, exp)
def test_flush(setup_path): with ensure_clean_store(setup_path) as store: store["a"] = tm.makeTimeSeries() store.flush() store.flush(fsync=True)
def test_series_operators_compare(self, comparison_op, func): op = comparison_op series = tm.makeTimeSeries().rename("ts") other = func(series) compare_op(series, other, op)
def test_series_operators_arithmetic(self, all_arithmetic_functions, func): op = all_arithmetic_functions series = tm.makeTimeSeries().rename("ts") other = func(series) compare_op(series, other, op)
def ts(): return tm.makeTimeSeries(name="ts")
def test_store_datetime_mixed(setup_path): df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["a", "b", "c"]}) ts = tm.makeTimeSeries() df["d"] = ts.index[:3] _check_roundtrip(df, tm.assert_frame_equal, path=setup_path)
def test_pickle_preserve_name(self): for n in [777, 777.0, "name", datetime(2001, 11, 11), (1, 2)]: unpickled = self._pickle_roundtrip_name(tm.makeTimeSeries(name=n)) assert unpickled.name == n
class TestSeriesMap: def test_map(self, datetime_series): index, data = tm.getMixedTypeDict() source = Series(data["B"], index=data["C"]) target = Series(data["C"][:4], index=data["D"][:4]) merged = target.map(source) for k, v in merged.items(): assert v == source[target[k]] # input could be a dict merged = target.map(source.to_dict()) for k, v in merged.items(): assert v == source[target[k]] # function result = datetime_series.map(lambda x: x * 2) tm.assert_series_equal(result, datetime_series * 2) # GH 10324 a = Series([1, 2, 3, 4]) b = Series(["even", "odd", "even", "odd"], dtype="category") c = Series(["even", "odd", "even", "odd"]) exp = Series(["odd", "even", "odd", np.nan], dtype="category") tm.assert_series_equal(a.map(b), exp) exp = Series(["odd", "even", "odd", np.nan]) tm.assert_series_equal(a.map(c), exp) a = Series(["a", "b", "c", "d"]) b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"])) c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"])) exp = Series([np.nan, 1, 2, 3]) tm.assert_series_equal(a.map(b), exp) exp = Series([np.nan, 1, 2, 3]) tm.assert_series_equal(a.map(c), exp) a = Series(["a", "b", "c", "d"]) b = Series( ["B", "C", "D", "E"], dtype="category", index=pd.CategoricalIndex(["b", "c", "d", "e"]), ) c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"])) exp = Series( pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"])) tm.assert_series_equal(a.map(b), exp) exp = Series([np.nan, "B", "C", "D"]) tm.assert_series_equal(a.map(c), exp) def test_map_empty(self, index): if isinstance(index, MultiIndex): pytest.skip( "Initializing a Series from a MultiIndex is not supported") s = Series(index) result = s.map({}) expected = Series(np.nan, index=s.index) tm.assert_series_equal(result, expected) def test_map_compat(self): # related GH 8024 s = Series([True, True, False], index=[1, 2, 3]) result = s.map({True: "foo", False: "bar"}) expected = Series(["foo", "foo", "bar"], index=[1, 2, 3]) tm.assert_series_equal(result, expected) def test_map_int(self): left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4}) right = Series({1: 11, 2: 22, 3: 33}) assert left.dtype == np.float_ assert issubclass(right.dtype.type, np.integer) merged = left.map(right) assert merged.dtype == np.float_ assert isna(merged["d"]) assert not isna(merged["c"]) def test_map_type_inference(self): s = Series(range(3)) s2 = s.map(lambda x: np.where(x == 0, 0, 1)) assert issubclass(s2.dtype.type, np.integer) def test_map_decimal(self, string_series): from decimal import Decimal result = string_series.map(lambda x: Decimal(str(x))) assert result.dtype == np.object_ assert isinstance(result[0], Decimal) def test_map_na_exclusion(self): s = Series([1.5, np.nan, 3, np.nan, 5]) result = s.map(lambda x: x * 2, na_action="ignore") exp = s * 2 tm.assert_series_equal(result, exp) def test_map_dict_with_tuple_keys(self): """ Due to new MultiIndex-ing behaviour in v0.14.0, dicts with tuple keys passed to map were being converted to a multi-index, preventing tuple values from being mapped properly. """ # GH 18496 df = DataFrame({"a": [(1, ), (2, ), (3, 4), (5, 6)]}) label_mappings = {(1, ): "A", (2, ): "B", (3, 4): "A", (5, 6): "B"} df["labels"] = df["a"].map(label_mappings) df["expected_labels"] = Series(["A", "B", "A", "B"], index=df.index) # All labels should be filled now tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False) def test_map_counter(self): s = Series(["a", "b", "c"], index=[1, 2, 3]) counter = Counter() counter["b"] = 5 counter["c"] += 1 result = s.map(counter) expected = Series([0, 5, 1], index=[1, 2, 3]) tm.assert_series_equal(result, expected) def test_map_defaultdict(self): s = Series([1, 2, 3], index=["a", "b", "c"]) default_dict = defaultdict(lambda: "blank") default_dict[1] = "stuff" result = s.map(default_dict) expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"]) tm.assert_series_equal(result, expected) def test_map_dict_na_key(self): # https://github.com/pandas-dev/pandas/issues/17648 # Checks that np.nan key is appropriately mapped s = Series([1, 2, np.nan]) expected = Series(["a", "b", "c"]) result = s.map({1: "a", 2: "b", np.nan: "c"}) tm.assert_series_equal(result, expected) def test_map_dict_subclass_with_missing(self): """ Test Series.map with a dictionary subclass that defines __missing__, i.e. sets a default value (GH #15999). """ class DictWithMissing(dict): def __missing__(self, key): return "missing" s = Series([1, 2, 3]) dictionary = DictWithMissing({3: "three"}) result = s.map(dictionary) expected = Series(["missing", "missing", "three"]) tm.assert_series_equal(result, expected) def test_map_dict_subclass_without_missing(self): class DictWithoutMissing(dict): pass s = Series([1, 2, 3]) dictionary = DictWithoutMissing({3: "three"}) result = s.map(dictionary) expected = Series([np.nan, np.nan, "three"]) tm.assert_series_equal(result, expected) def test_map_abc_mapping(self, non_dict_mapping_subclass): # https://github.com/pandas-dev/pandas/issues/29733 # Check collections.abc.Mapping support as mapper for Series.map s = Series([1, 2, 3]) not_a_dictionary = non_dict_mapping_subclass({3: "three"}) result = s.map(not_a_dictionary) expected = Series([np.nan, np.nan, "three"]) tm.assert_series_equal(result, expected) def test_map_abc_mapping_with_missing(self, non_dict_mapping_subclass): # https://github.com/pandas-dev/pandas/issues/29733 # Check collections.abc.Mapping support as mapper for Series.map class NonDictMappingWithMissing(non_dict_mapping_subclass): def __missing__(self, key): return "missing" s = Series([1, 2, 3]) not_a_dictionary = NonDictMappingWithMissing({3: "three"}) result = s.map(not_a_dictionary) # __missing__ is a dict concept, not a Mapping concept, # so it should not change the result! expected = Series([np.nan, np.nan, "three"]) tm.assert_series_equal(result, expected) def test_map_box(self): vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")] s = Series(vals) assert s.dtype == "datetime64[ns]" # boxed value must be Timestamp instance res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") exp = Series(["Timestamp_1_None", "Timestamp_2_None"]) tm.assert_series_equal(res, exp) vals = [ pd.Timestamp("2011-01-01", tz="US/Eastern"), pd.Timestamp("2011-01-02", tz="US/Eastern"), ] s = Series(vals) assert s.dtype == "datetime64[ns, US/Eastern]" res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) tm.assert_series_equal(res, exp) # timedelta vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] s = Series(vals) assert s.dtype == "timedelta64[ns]" res = s.apply(lambda x: f"{type(x).__name__}_{x.days}") exp = Series(["Timedelta_1", "Timedelta_2"]) tm.assert_series_equal(res, exp) # period vals = [ pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M") ] s = Series(vals) assert s.dtype == "Period[M]" res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}") exp = Series(["Period_M", "Period_M"]) tm.assert_series_equal(res, exp) def test_map_categorical(self): values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) s = Series(values, name="XX", index=list("abcdefg")) result = s.map(lambda x: x.lower()) exp_values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True) exp = Series(exp_values, name="XX", index=list("abcdefg")) tm.assert_series_equal(result, exp) tm.assert_categorical_equal(result.values, exp_values) result = s.map(lambda x: "A") exp = Series(["A"] * 7, name="XX", index=list("abcdefg")) tm.assert_series_equal(result, exp) assert result.dtype == object with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN): s.map(lambda x: x, na_action="ignore") def test_map_datetimetz(self): values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize("Asia/Tokyo") s = Series(values, name="XX") # keep tz result = s.map(lambda x: x + pd.offsets.Day()) exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize("Asia/Tokyo") exp = Series(exp_values, name="XX") tm.assert_series_equal(result, exp) # change dtype # GH 14506 : Returned dtype changed from int32 to int64 result = s.map(lambda x: x.hour) exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64) tm.assert_series_equal(result, exp) with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN): s.map(lambda x: x, na_action="ignore") # not vectorized def f(x): if not isinstance(x, pd.Timestamp): raise ValueError return str(x.tz) result = s.map(f) exp = Series(["Asia/Tokyo"] * 25, name="XX") tm.assert_series_equal(result, exp) @pytest.mark.parametrize( "vals,mapping,exp", [ (list("abc"), { np.nan: "not NaN" }, [np.nan] * 3 + ["not NaN"]), (list("abc"), { "a": "a letter" }, ["a letter"] + [np.nan] * 3), (list(range(3)), { 0: 42 }, [42] + [np.nan] * 3), ], ) def test_map_missing_mixed(self, vals, mapping, exp): # GH20495 s = Series(vals + [np.nan]) result = s.map(mapping) tm.assert_series_equal(result, Series(exp)) @pytest.mark.parametrize( "dti,exp", [ ( Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])), DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"), ), ( tm.makeTimeSeries(nper=30), DataFrame(np.repeat([[1, 2]], 30, axis=0), dtype="int64"), ), ], ) @pytest.mark.parametrize("aware", [True, False]) def test_apply_series_on_date_time_index_aware_series( self, dti, exp, aware): # GH 25959 # Calling apply on a localized time series should not cause an error if aware: index = dti.tz_localize("UTC").index else: index = dti.index result = Series(index).apply(lambda x: Series([1, 2])) tm.assert_frame_equal(result, exp) def test_apply_scaler_on_date_time_index_aware_series(self): # GH 25959 # Calling apply on a localized time series should not cause an error series = tm.makeTimeSeries(nper=30).tz_localize("UTC") result = Series(series.index).apply(lambda x: 1) tm.assert_series_equal(result, Series(np.ones(30), dtype="int64")) def test_map_float_to_string_precision(self): # GH 13228 ser = Series(1 / 3) result = ser.map(lambda val: str(val)).to_dict() expected = {0: "0.3333333333333333"} assert result == expected def test_map_with_invalid_na_action_raises(self): # https://github.com/pandas-dev/pandas/issues/32815 s = Series([1, 2, 3]) msg = "na_action must either be 'ignore' or None" with pytest.raises(ValueError, match=msg): s.map(lambda x: x, na_action="____") def test_apply_to_timedelta(self): list_of_valid_strings = ["00:00:01", "00:00:02"] a = pd.to_timedelta(list_of_valid_strings) b = Series(list_of_valid_strings).apply(pd.to_timedelta) # FIXME: dont leave commented-out # Can't compare until apply on a Series gives the correct dtype # assert_series_equal(a, b) list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT] a = pd.to_timedelta(list_of_strings) # noqa b = Series(list_of_strings).apply(pd.to_timedelta) # noqa
def test_lag_plot(self, kwargs): from pandas.plotting import lag_plot ser = tm.makeTimeSeries(name="ts") _check_plot_works(lag_plot, series=ser, **kwargs)
Series(["a", "b", "c"]), Series(["a", np.nan, "c"]), Series(["a", None, "c"]), Series([True, False, True]), Series(dtype=object), Index([1, 2, 3]), Index([True, False, True]), DataFrame({ "x": ["a", "b", "c"], "y": [1, 2, 3] }), DataFrame(), tm.makeMissingDataframe(), tm.makeMixedDataFrame(), tm.makeTimeDataFrame(), tm.makeTimeSeries(), tm.makeTimedeltaIndex(), tm.makePeriodIndex(), Series(tm.makePeriodIndex()), Series(pd.date_range("20130101", periods=3, tz="US/Eastern")), MultiIndex.from_product([ range(5), ["foo", "bar", "baz"], pd.date_range("20130101", periods=2) ]), MultiIndex.from_product([pd.CategoricalIndex(list("aabc")), range(3)]), ], ) def test_hash_pandas_object(obj, index): _check_equal(obj, index=index) _check_not_equal_with_index(obj)
def test_bootstrap_plot(self): from pandas.plotting import bootstrap_plot ser = tm.makeTimeSeries(name="ts") _check_plot_works(bootstrap_plot, series=ser, size=10)