def test_series_frame_radd_bug(self):
        # GH#353
        vals = pd.Series(tm.rands_array(5, 10))
        result = "foo_" + vals
        expected = vals.map(lambda x: "foo_" + x)
        tm.assert_series_equal(result, expected)

        frame = pd.DataFrame({"vals": vals})
        result = "foo_" + frame
        expected = pd.DataFrame({"vals": vals.map(lambda x: "foo_" + x)})
        tm.assert_frame_equal(result, expected)

        ts = tm.makeTimeSeries()
        ts.name = "ts"

        # really raise this time
        now = pd.Timestamp.now().to_pydatetime()
        msg = "unsupported operand type"
        with pytest.raises(TypeError, match=msg):
            now + ts

        with pytest.raises(TypeError, match=msg):
            ts + now
示例#2
0
    def test_concat_series(self):

        ts = tm.makeTimeSeries()
        ts.name = "foo"

        pieces = [ts[:5], ts[5:15], ts[15:]]

        result = concat(pieces)
        tm.assert_series_equal(result, ts)
        assert result.name == ts.name

        result = concat(pieces, keys=[0, 1, 2])
        expected = ts.copy()

        ts.index = DatetimeIndex(np.array(ts.index.values, dtype="M8[ns]"))

        exp_codes = [
            np.repeat([0, 1, 2], [len(x) for x in pieces]),
            np.arange(len(ts))
        ]
        exp_index = MultiIndex(levels=[[0, 1, 2], ts.index], codes=exp_codes)
        expected.index = exp_index
        tm.assert_series_equal(result, expected)
def test_series_describe_single():
    ts = tm.makeTimeSeries()
    grouped = ts.groupby(lambda x: x.month)
    result = grouped.apply(lambda x: x.describe())
    expected = grouped.describe().stack()
    tm.assert_series_equal(result, expected)
示例#4
0
    def setup_method(self):
        self.ts = tm.makeTimeSeries()  # Was at top level in test_series
        self.ts.name = "ts"

        self.series = tm.makeStringSeries()
        self.series.name = "series"
示例#5
0
    def test_reindex(self, float_frame):
        datetime_series = tm.makeTimeSeries(nper=30)

        newFrame = float_frame.reindex(datetime_series.index)

        for col in newFrame.columns:
            for idx, val in newFrame[col].items():
                if idx in float_frame.index:
                    if np.isnan(val):
                        assert np.isnan(float_frame[col][idx])
                    else:
                        assert val == float_frame[col][idx]
                else:
                    assert np.isnan(val)

        for col, series in newFrame.items():
            assert tm.equalContents(series.index, newFrame.index)
        emptyFrame = float_frame.reindex(Index([]))
        assert len(emptyFrame.index) == 0

        # Cython code should be unit-tested directly
        nonContigFrame = float_frame.reindex(datetime_series.index[::2])

        for col in nonContigFrame.columns:
            for idx, val in nonContigFrame[col].items():
                if idx in float_frame.index:
                    if np.isnan(val):
                        assert np.isnan(float_frame[col][idx])
                    else:
                        assert val == float_frame[col][idx]
                else:
                    assert np.isnan(val)

        for col, series in nonContigFrame.items():
            assert tm.equalContents(series.index, nonContigFrame.index)

        # corner cases

        # Same index, copies values but not index if copy=False
        newFrame = float_frame.reindex(float_frame.index, copy=False)
        assert newFrame.index is float_frame.index

        # length zero
        newFrame = float_frame.reindex([])
        assert newFrame.empty
        assert len(newFrame.columns) == len(float_frame.columns)

        # length zero with columns reindexed with non-empty index
        newFrame = float_frame.reindex([])
        newFrame = newFrame.reindex(float_frame.index)
        assert len(newFrame.index) == len(float_frame.index)
        assert len(newFrame.columns) == len(float_frame.columns)

        # pass non-Index
        newFrame = float_frame.reindex(list(datetime_series.index))
        expected = datetime_series.index._with_freq(None)
        tm.assert_index_equal(newFrame.index, expected)

        # copy with no axes
        result = float_frame.reindex()
        tm.assert_frame_equal(result, float_frame)
        assert result is not float_frame
示例#6
0
def test_pickle_preserve_name(name):

    unpickled = tm.round_trip_pickle(tm.makeTimeSeries(name=name))
    assert unpickled.name == name
示例#7
0
文件: conftest.py 项目: zach-b/pandas
def ts():
    return tm.makeTimeSeries()
示例#8
0
 def test_apply_scaler_on_date_time_index_aware_series(self):
     # GH 25959
     # Calling apply on a localized time series should not cause an error
     series = tm.makeTimeSeries(nper=30).tz_localize("UTC")
     result = Series(series.index).apply(lambda x: 1)
     tm.assert_series_equal(result, Series(np.ones(30), dtype="int64"))
示例#9
0
    # GH20495
    s = Series(vals + [np.nan])
    result = s.map(mapping)

    tm.assert_series_equal(result, Series(exp))


@pytest.mark.parametrize(
    "dti,exp",
    [
        (
            Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])),
            DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"),
        ),
        (
            tm.makeTimeSeries(nper=30),
            DataFrame(np.repeat([[1, 2]], 30, axis=0), dtype="int64"),
        ),
    ],
)
@pytest.mark.parametrize("aware", [True, False])
def test_apply_series_on_date_time_index_aware_series(dti, exp, aware):
    # GH 25959
    # Calling apply on a localized time series should not cause an error
    if aware:
        index = dti.tz_localize("UTC").index
    else:
        index = dti.index
    result = Series(index).apply(lambda x: Series([1, 2]))
    tm.assert_frame_equal(result, exp)
示例#10
0
def test_flush(setup_path):

    with ensure_clean_store(setup_path) as store:
        store["a"] = tm.makeTimeSeries()
        store.flush()
        store.flush(fsync=True)
示例#11
0
 def test_series_operators_compare(self, comparison_op, func):
     op = comparison_op
     series = tm.makeTimeSeries().rename("ts")
     other = func(series)
     compare_op(series, other, op)
示例#12
0
 def test_series_operators_arithmetic(self, all_arithmetic_functions, func):
     op = all_arithmetic_functions
     series = tm.makeTimeSeries().rename("ts")
     other = func(series)
     compare_op(series, other, op)
示例#13
0
def ts():
    return tm.makeTimeSeries(name="ts")
示例#14
0
def test_store_datetime_mixed(setup_path):

    df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["a", "b", "c"]})
    ts = tm.makeTimeSeries()
    df["d"] = ts.index[:3]
    _check_roundtrip(df, tm.assert_frame_equal, path=setup_path)
示例#15
0
 def test_pickle_preserve_name(self):
     for n in [777, 777.0, "name", datetime(2001, 11, 11), (1, 2)]:
         unpickled = self._pickle_roundtrip_name(tm.makeTimeSeries(name=n))
         assert unpickled.name == n
示例#16
0
class TestSeriesMap:
    def test_map(self, datetime_series):
        index, data = tm.getMixedTypeDict()

        source = Series(data["B"], index=data["C"])
        target = Series(data["C"][:4], index=data["D"][:4])

        merged = target.map(source)

        for k, v in merged.items():
            assert v == source[target[k]]

        # input could be a dict
        merged = target.map(source.to_dict())

        for k, v in merged.items():
            assert v == source[target[k]]

        # function
        result = datetime_series.map(lambda x: x * 2)
        tm.assert_series_equal(result, datetime_series * 2)

        # GH 10324
        a = Series([1, 2, 3, 4])
        b = Series(["even", "odd", "even", "odd"], dtype="category")
        c = Series(["even", "odd", "even", "odd"])

        exp = Series(["odd", "even", "odd", np.nan], dtype="category")
        tm.assert_series_equal(a.map(b), exp)
        exp = Series(["odd", "even", "odd", np.nan])
        tm.assert_series_equal(a.map(c), exp)

        a = Series(["a", "b", "c", "d"])
        b = Series([1, 2, 3, 4],
                   index=pd.CategoricalIndex(["b", "c", "d", "e"]))
        c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"]))

        exp = Series([np.nan, 1, 2, 3])
        tm.assert_series_equal(a.map(b), exp)
        exp = Series([np.nan, 1, 2, 3])
        tm.assert_series_equal(a.map(c), exp)

        a = Series(["a", "b", "c", "d"])
        b = Series(
            ["B", "C", "D", "E"],
            dtype="category",
            index=pd.CategoricalIndex(["b", "c", "d", "e"]),
        )
        c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"]))

        exp = Series(
            pd.Categorical([np.nan, "B", "C", "D"],
                           categories=["B", "C", "D", "E"]))
        tm.assert_series_equal(a.map(b), exp)
        exp = Series([np.nan, "B", "C", "D"])
        tm.assert_series_equal(a.map(c), exp)

    def test_map_empty(self, index):
        if isinstance(index, MultiIndex):
            pytest.skip(
                "Initializing a Series from a MultiIndex is not supported")

        s = Series(index)
        result = s.map({})

        expected = Series(np.nan, index=s.index)
        tm.assert_series_equal(result, expected)

    def test_map_compat(self):
        # related GH 8024
        s = Series([True, True, False], index=[1, 2, 3])
        result = s.map({True: "foo", False: "bar"})
        expected = Series(["foo", "foo", "bar"], index=[1, 2, 3])
        tm.assert_series_equal(result, expected)

    def test_map_int(self):
        left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4})
        right = Series({1: 11, 2: 22, 3: 33})

        assert left.dtype == np.float_
        assert issubclass(right.dtype.type, np.integer)

        merged = left.map(right)
        assert merged.dtype == np.float_
        assert isna(merged["d"])
        assert not isna(merged["c"])

    def test_map_type_inference(self):
        s = Series(range(3))
        s2 = s.map(lambda x: np.where(x == 0, 0, 1))
        assert issubclass(s2.dtype.type, np.integer)

    def test_map_decimal(self, string_series):
        from decimal import Decimal

        result = string_series.map(lambda x: Decimal(str(x)))
        assert result.dtype == np.object_
        assert isinstance(result[0], Decimal)

    def test_map_na_exclusion(self):
        s = Series([1.5, np.nan, 3, np.nan, 5])

        result = s.map(lambda x: x * 2, na_action="ignore")
        exp = s * 2
        tm.assert_series_equal(result, exp)

    def test_map_dict_with_tuple_keys(self):
        """
        Due to new MultiIndex-ing behaviour in v0.14.0,
        dicts with tuple keys passed to map were being
        converted to a multi-index, preventing tuple values
        from being mapped properly.
        """
        # GH 18496
        df = DataFrame({"a": [(1, ), (2, ), (3, 4), (5, 6)]})
        label_mappings = {(1, ): "A", (2, ): "B", (3, 4): "A", (5, 6): "B"}

        df["labels"] = df["a"].map(label_mappings)
        df["expected_labels"] = Series(["A", "B", "A", "B"], index=df.index)
        # All labels should be filled now
        tm.assert_series_equal(df["labels"],
                               df["expected_labels"],
                               check_names=False)

    def test_map_counter(self):
        s = Series(["a", "b", "c"], index=[1, 2, 3])
        counter = Counter()
        counter["b"] = 5
        counter["c"] += 1
        result = s.map(counter)
        expected = Series([0, 5, 1], index=[1, 2, 3])
        tm.assert_series_equal(result, expected)

    def test_map_defaultdict(self):
        s = Series([1, 2, 3], index=["a", "b", "c"])
        default_dict = defaultdict(lambda: "blank")
        default_dict[1] = "stuff"
        result = s.map(default_dict)
        expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"])
        tm.assert_series_equal(result, expected)

    def test_map_dict_na_key(self):
        # https://github.com/pandas-dev/pandas/issues/17648
        # Checks that np.nan key is appropriately mapped
        s = Series([1, 2, np.nan])
        expected = Series(["a", "b", "c"])
        result = s.map({1: "a", 2: "b", np.nan: "c"})
        tm.assert_series_equal(result, expected)

    def test_map_dict_subclass_with_missing(self):
        """
        Test Series.map with a dictionary subclass that defines __missing__,
        i.e. sets a default value (GH #15999).
        """
        class DictWithMissing(dict):
            def __missing__(self, key):
                return "missing"

        s = Series([1, 2, 3])
        dictionary = DictWithMissing({3: "three"})
        result = s.map(dictionary)
        expected = Series(["missing", "missing", "three"])
        tm.assert_series_equal(result, expected)

    def test_map_dict_subclass_without_missing(self):
        class DictWithoutMissing(dict):
            pass

        s = Series([1, 2, 3])
        dictionary = DictWithoutMissing({3: "three"})
        result = s.map(dictionary)
        expected = Series([np.nan, np.nan, "three"])
        tm.assert_series_equal(result, expected)

    def test_map_abc_mapping(self, non_dict_mapping_subclass):
        # https://github.com/pandas-dev/pandas/issues/29733
        # Check collections.abc.Mapping support as mapper for Series.map
        s = Series([1, 2, 3])
        not_a_dictionary = non_dict_mapping_subclass({3: "three"})
        result = s.map(not_a_dictionary)
        expected = Series([np.nan, np.nan, "three"])
        tm.assert_series_equal(result, expected)

    def test_map_abc_mapping_with_missing(self, non_dict_mapping_subclass):
        # https://github.com/pandas-dev/pandas/issues/29733
        # Check collections.abc.Mapping support as mapper for Series.map
        class NonDictMappingWithMissing(non_dict_mapping_subclass):
            def __missing__(self, key):
                return "missing"

        s = Series([1, 2, 3])
        not_a_dictionary = NonDictMappingWithMissing({3: "three"})
        result = s.map(not_a_dictionary)
        # __missing__ is a dict concept, not a Mapping concept,
        # so it should not change the result!
        expected = Series([np.nan, np.nan, "three"])
        tm.assert_series_equal(result, expected)

    def test_map_box(self):
        vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
        s = Series(vals)
        assert s.dtype == "datetime64[ns]"
        # boxed value must be Timestamp instance
        res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
        exp = Series(["Timestamp_1_None", "Timestamp_2_None"])
        tm.assert_series_equal(res, exp)

        vals = [
            pd.Timestamp("2011-01-01", tz="US/Eastern"),
            pd.Timestamp("2011-01-02", tz="US/Eastern"),
        ]
        s = Series(vals)
        assert s.dtype == "datetime64[ns, US/Eastern]"
        res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
        exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
        tm.assert_series_equal(res, exp)

        # timedelta
        vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
        s = Series(vals)
        assert s.dtype == "timedelta64[ns]"
        res = s.apply(lambda x: f"{type(x).__name__}_{x.days}")
        exp = Series(["Timedelta_1", "Timedelta_2"])
        tm.assert_series_equal(res, exp)

        # period
        vals = [
            pd.Period("2011-01-01", freq="M"),
            pd.Period("2011-01-02", freq="M")
        ]
        s = Series(vals)
        assert s.dtype == "Period[M]"
        res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}")
        exp = Series(["Period_M", "Period_M"])
        tm.assert_series_equal(res, exp)

    def test_map_categorical(self):
        values = pd.Categorical(list("ABBABCD"),
                                categories=list("DCBA"),
                                ordered=True)
        s = Series(values, name="XX", index=list("abcdefg"))

        result = s.map(lambda x: x.lower())
        exp_values = pd.Categorical(list("abbabcd"),
                                    categories=list("dcba"),
                                    ordered=True)
        exp = Series(exp_values, name="XX", index=list("abcdefg"))
        tm.assert_series_equal(result, exp)
        tm.assert_categorical_equal(result.values, exp_values)

        result = s.map(lambda x: "A")
        exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
        tm.assert_series_equal(result, exp)
        assert result.dtype == object

        with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN):
            s.map(lambda x: x, na_action="ignore")

    def test_map_datetimetz(self):
        values = pd.date_range("2011-01-01", "2011-01-02",
                               freq="H").tz_localize("Asia/Tokyo")
        s = Series(values, name="XX")

        # keep tz
        result = s.map(lambda x: x + pd.offsets.Day())
        exp_values = pd.date_range("2011-01-02", "2011-01-03",
                                   freq="H").tz_localize("Asia/Tokyo")
        exp = Series(exp_values, name="XX")
        tm.assert_series_equal(result, exp)

        # change dtype
        # GH 14506 : Returned dtype changed from int32 to int64
        result = s.map(lambda x: x.hour)
        exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
        tm.assert_series_equal(result, exp)

        with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN):
            s.map(lambda x: x, na_action="ignore")

        # not vectorized
        def f(x):
            if not isinstance(x, pd.Timestamp):
                raise ValueError
            return str(x.tz)

        result = s.map(f)
        exp = Series(["Asia/Tokyo"] * 25, name="XX")
        tm.assert_series_equal(result, exp)

    @pytest.mark.parametrize(
        "vals,mapping,exp",
        [
            (list("abc"), {
                np.nan: "not NaN"
            }, [np.nan] * 3 + ["not NaN"]),
            (list("abc"), {
                "a": "a letter"
            }, ["a letter"] + [np.nan] * 3),
            (list(range(3)), {
                0: 42
            }, [42] + [np.nan] * 3),
        ],
    )
    def test_map_missing_mixed(self, vals, mapping, exp):
        # GH20495
        s = Series(vals + [np.nan])
        result = s.map(mapping)

        tm.assert_series_equal(result, Series(exp))

    @pytest.mark.parametrize(
        "dti,exp",
        [
            (
                Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])),
                DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"),
            ),
            (
                tm.makeTimeSeries(nper=30),
                DataFrame(np.repeat([[1, 2]], 30, axis=0), dtype="int64"),
            ),
        ],
    )
    @pytest.mark.parametrize("aware", [True, False])
    def test_apply_series_on_date_time_index_aware_series(
            self, dti, exp, aware):
        # GH 25959
        # Calling apply on a localized time series should not cause an error
        if aware:
            index = dti.tz_localize("UTC").index
        else:
            index = dti.index
        result = Series(index).apply(lambda x: Series([1, 2]))
        tm.assert_frame_equal(result, exp)

    def test_apply_scaler_on_date_time_index_aware_series(self):
        # GH 25959
        # Calling apply on a localized time series should not cause an error
        series = tm.makeTimeSeries(nper=30).tz_localize("UTC")
        result = Series(series.index).apply(lambda x: 1)
        tm.assert_series_equal(result, Series(np.ones(30), dtype="int64"))

    def test_map_float_to_string_precision(self):
        # GH 13228
        ser = Series(1 / 3)
        result = ser.map(lambda val: str(val)).to_dict()
        expected = {0: "0.3333333333333333"}
        assert result == expected

    def test_map_with_invalid_na_action_raises(self):
        # https://github.com/pandas-dev/pandas/issues/32815
        s = Series([1, 2, 3])
        msg = "na_action must either be 'ignore' or None"
        with pytest.raises(ValueError, match=msg):
            s.map(lambda x: x, na_action="____")

    def test_apply_to_timedelta(self):
        list_of_valid_strings = ["00:00:01", "00:00:02"]
        a = pd.to_timedelta(list_of_valid_strings)
        b = Series(list_of_valid_strings).apply(pd.to_timedelta)
        # FIXME: dont leave commented-out
        # Can't compare until apply on a Series gives the correct dtype
        # assert_series_equal(a, b)

        list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT]

        a = pd.to_timedelta(list_of_strings)  # noqa
        b = Series(list_of_strings).apply(pd.to_timedelta)  # noqa
示例#17
0
    def test_lag_plot(self, kwargs):
        from pandas.plotting import lag_plot

        ser = tm.makeTimeSeries(name="ts")
        _check_plot_works(lag_plot, series=ser, **kwargs)
示例#18
0
        Series(["a", "b", "c"]),
        Series(["a", np.nan, "c"]),
        Series(["a", None, "c"]),
        Series([True, False, True]),
        Series(dtype=object),
        Index([1, 2, 3]),
        Index([True, False, True]),
        DataFrame({
            "x": ["a", "b", "c"],
            "y": [1, 2, 3]
        }),
        DataFrame(),
        tm.makeMissingDataframe(),
        tm.makeMixedDataFrame(),
        tm.makeTimeDataFrame(),
        tm.makeTimeSeries(),
        tm.makeTimedeltaIndex(),
        tm.makePeriodIndex(),
        Series(tm.makePeriodIndex()),
        Series(pd.date_range("20130101", periods=3, tz="US/Eastern")),
        MultiIndex.from_product([
            range(5), ["foo", "bar", "baz"],
            pd.date_range("20130101", periods=2)
        ]),
        MultiIndex.from_product([pd.CategoricalIndex(list("aabc")),
                                 range(3)]),
    ],
)
def test_hash_pandas_object(obj, index):
    _check_equal(obj, index=index)
    _check_not_equal_with_index(obj)
示例#19
0
    def test_bootstrap_plot(self):
        from pandas.plotting import bootstrap_plot

        ser = tm.makeTimeSeries(name="ts")
        _check_plot_works(bootstrap_plot, series=ser, size=10)