示例#1
0
    def test_frame_to_time_stamp(self):
        K = 5
        index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
        df = DataFrame(randn(len(index), K), index=index)
        df['mix'] = 'a'

        exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC')
        exp_index = exp_index + Timedelta(1, 'D') - Timedelta(1, 'ns')
        result = df.to_timestamp('D', 'end')
        tm.assert_index_equal(result.index, exp_index)
        tm.assert_numpy_array_equal(result.values, df.values)

        exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN')
        result = df.to_timestamp('D', 'start')
        tm.assert_index_equal(result.index, exp_index)

        def _get_with_delta(delta, freq='A-DEC'):
            return date_range(to_datetime('1/1/2001') + delta,
                              to_datetime('12/31/2009') + delta,
                              freq=freq)

        delta = timedelta(hours=23)
        result = df.to_timestamp('H', 'end')
        exp_index = _get_with_delta(delta)
        exp_index = exp_index + Timedelta(1, 'h') - Timedelta(1, 'ns')
        tm.assert_index_equal(result.index, exp_index)

        delta = timedelta(hours=23, minutes=59)
        result = df.to_timestamp('T', 'end')
        exp_index = _get_with_delta(delta)
        exp_index = exp_index + Timedelta(1, 'm') - Timedelta(1, 'ns')
        tm.assert_index_equal(result.index, exp_index)

        result = df.to_timestamp('S', 'end')
        delta = timedelta(hours=23, minutes=59, seconds=59)
        exp_index = _get_with_delta(delta)
        exp_index = exp_index + Timedelta(1, 's') - Timedelta(1, 'ns')
        tm.assert_index_equal(result.index, exp_index)

        # columns
        df = df.T

        exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC')
        exp_index = exp_index + Timedelta(1, 'D') - Timedelta(1, 'ns')
        result = df.to_timestamp('D', 'end', axis=1)
        tm.assert_index_equal(result.columns, exp_index)
        tm.assert_numpy_array_equal(result.values, df.values)

        exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN')
        result = df.to_timestamp('D', 'start', axis=1)
        tm.assert_index_equal(result.columns, exp_index)

        delta = timedelta(hours=23)
        result = df.to_timestamp('H', 'end', axis=1)
        exp_index = _get_with_delta(delta)
        exp_index = exp_index + Timedelta(1, 'h') - Timedelta(1, 'ns')
        tm.assert_index_equal(result.columns, exp_index)

        delta = timedelta(hours=23, minutes=59)
        result = df.to_timestamp('T', 'end', axis=1)
        exp_index = _get_with_delta(delta)
        exp_index = exp_index + Timedelta(1, 'm') - Timedelta(1, 'ns')
        tm.assert_index_equal(result.columns, exp_index)

        result = df.to_timestamp('S', 'end', axis=1)
        delta = timedelta(hours=23, minutes=59, seconds=59)
        exp_index = _get_with_delta(delta)
        exp_index = exp_index + Timedelta(1, 's') - Timedelta(1, 'ns')
        tm.assert_index_equal(result.columns, exp_index)

        # invalid axis
        tm.assert_raises_regex(ValueError, 'axis', df.to_timestamp, axis=2)

        result1 = df.to_timestamp('5t', axis=1)
        result2 = df.to_timestamp('t', axis=1)
        expected = pd.date_range('2001-01-01', '2009-01-01', freq='AS')
        assert isinstance(result1.columns, DatetimeIndex)
        assert isinstance(result2.columns, DatetimeIndex)
        tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8)
        tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8)
        # PeriodIndex.to_timestamp always use 'infer'
        assert result1.columns.freqstr == 'AS-JAN'
        assert result2.columns.freqstr == 'AS-JAN'
示例#2
0
    def test_period_index_length(self):
        pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
        assert len(pi) == 9

        pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009')
        assert len(pi) == 4 * 9

        pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009')
        assert len(pi) == 12 * 9

        start = Period('02-Apr-2005', 'B')
        i1 = PeriodIndex(start=start, periods=20)
        assert len(i1) == 20
        assert i1.freq == start.freq
        assert i1[0] == start

        end_intv = Period('2006-12-31', 'W')
        i1 = PeriodIndex(end=end_intv, periods=10)
        assert len(i1) == 10
        assert i1.freq == end_intv.freq
        assert i1[-1] == end_intv

        end_intv = Period('2006-12-31', '1w')
        i2 = PeriodIndex(end=end_intv, periods=10)
        assert len(i1) == len(i2)
        assert (i1 == i2).all()
        assert i1.freq == i2.freq

        end_intv = Period('2006-12-31', ('w', 1))
        i2 = PeriodIndex(end=end_intv, periods=10)
        assert len(i1) == len(i2)
        assert (i1 == i2).all()
        assert i1.freq == i2.freq

        try:
            PeriodIndex(start=start, end=end_intv)
            raise AssertionError('Cannot allow mixed freq for start and end')
        except ValueError:
            pass

        end_intv = Period('2005-05-01', 'B')
        i1 = PeriodIndex(start=start, end=end_intv)

        try:
            PeriodIndex(start=start)
            raise AssertionError(
                'Must specify periods if missing start or end')
        except ValueError:
            pass

        # infer freq from first element
        i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')])
        assert len(i2) == 2
        assert i2[0] == end_intv

        i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')]))
        assert len(i2) == 2
        assert i2[0] == end_intv

        # Mixed freq should fail
        vals = [end_intv, Period('2006-12-31', 'w')]
        pytest.raises(ValueError, PeriodIndex, vals)
        vals = np.array(vals)
        pytest.raises(ValueError, PeriodIndex, vals)
示例#3
0
    def test_shift(self):
        pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
        pi2 = PeriodIndex(freq='A', start='1/1/2002', end='12/1/2010')

        tm.assert_index_equal(pi1.shift(0), pi1)

        assert len(pi1) == len(pi2)
        tm.assert_index_equal(pi1.shift(1), pi2)

        pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
        pi2 = PeriodIndex(freq='A', start='1/1/2000', end='12/1/2008')
        assert len(pi1) == len(pi2)
        tm.assert_index_equal(pi1.shift(-1), pi2)

        pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009')
        pi2 = PeriodIndex(freq='M', start='2/1/2001', end='1/1/2010')
        assert len(pi1) == len(pi2)
        tm.assert_index_equal(pi1.shift(1), pi2)

        pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009')
        pi2 = PeriodIndex(freq='M', start='12/1/2000', end='11/1/2009')
        assert len(pi1) == len(pi2)
        tm.assert_index_equal(pi1.shift(-1), pi2)

        pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009')
        pi2 = PeriodIndex(freq='D', start='1/2/2001', end='12/2/2009')
        assert len(pi1) == len(pi2)
        tm.assert_index_equal(pi1.shift(1), pi2)

        pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009')
        pi2 = PeriodIndex(freq='D', start='12/31/2000', end='11/30/2009')
        assert len(pi1) == len(pi2)
        tm.assert_index_equal(pi1.shift(-1), pi2)
示例#4
0
    def test_intersection_cases(self, sort):
        base = period_range("6/1/2000", "6/30/2000", freq="D", name="idx")

        # if target has the same name, it is preserved
        rng2 = period_range("5/15/2000", "6/20/2000", freq="D", name="idx")
        expected2 = period_range("6/1/2000", "6/20/2000", freq="D", name="idx")

        # if target name is different, it will be reset
        rng3 = period_range("5/15/2000", "6/20/2000", freq="D", name="other")
        expected3 = period_range("6/1/2000", "6/20/2000", freq="D", name=None)

        rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx")
        expected4 = PeriodIndex([], name="idx", freq="D")

        for (rng, expected) in [
            (rng2, expected2),
            (rng3, expected3),
            (rng4, expected4),
        ]:
            result = base.intersection(rng, sort=sort)
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq

        # non-monotonic
        base = PeriodIndex(
            ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"],
            freq="D",
            name="idx",
        )

        rng2 = PeriodIndex(
            ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
            freq="D",
            name="idx",
        )
        expected2 = PeriodIndex(["2011-01-04", "2011-01-02"],
                                freq="D",
                                name="idx")

        rng3 = PeriodIndex(
            ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
            freq="D",
            name="other",
        )
        expected3 = PeriodIndex(["2011-01-04", "2011-01-02"],
                                freq="D",
                                name=None)

        rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx")
        expected4 = PeriodIndex([], freq="D", name="idx")

        for (rng, expected) in [
            (rng2, expected2),
            (rng3, expected3),
            (rng4, expected4),
        ]:
            result = base.intersection(rng, sort=sort)
            if sort is None:
                expected = expected.sort_values()
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == "D"

        # empty same freq
        rng = date_range("6/1/2000", "6/15/2000", freq="T")
        result = rng[0:0].intersection(rng)
        assert len(result) == 0

        result = rng.intersection(rng[0:0])
        assert len(result) == 0
示例#5
0
            ["datetime64[ns, Asia/Tokyo]", "timedelta"],
        ),
    ],
)
def test_get_dtype_kinds(index_or_series, to_concat, expected):
    to_concat_klass = [index_or_series(c) for c in to_concat]
    result = _concat._get_dtype_kinds(to_concat_klass)
    assert result == set(expected)


@pytest.mark.parametrize(
    "to_concat, expected",
    [
        (
            [
                PeriodIndex(["2011-01"], freq="M"),
                PeriodIndex(["2011-01"], freq="M")
            ],
            ["period[M]"],
        ),
        (
            [
                Series([Period("2011-01", freq="M")]),
                Series([Period("2011-02", freq="M")]),
            ],
            ["period[M]"],
        ),
        (
            [
                PeriodIndex(["2011-01"], freq="M"),
                PeriodIndex(["2011-01"], freq="D")
示例#6
0
 def test_repeat_freqstr(self, index, use_numpy):
     # GH10183
     expected = PeriodIndex([p for p in index for _ in range(3)])
     result = np.repeat(index, 3) if use_numpy else index.repeat(3)
     tm.assert_index_equal(result, expected)
     assert result.freqstr == index.freqstr
示例#7
0
 def test_astype_raises(self, dtype):
     # GH#13149, GH#13209
     idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq="D")
     msg = "Cannot cast PeriodIndex to dtype"
     with pytest.raises(TypeError, match=msg):
         idx.astype(dtype)
示例#8
0
 def test_recreate_from_data(self, freq):
     org = period_range(start="2001/04/01", freq=freq, periods=1)
     idx = PeriodIndex(org.values, freq=freq)
     tm.assert_index_equal(idx, org)
示例#9
0
    result = arr.to_numpy(na_value=arr[1].to_numpy())
    assert result[0] == result[1]

    result = arr.to_numpy(na_value=arr[1].to_numpy(copy=False))
    assert result[0] == result[1]

    tm.assert_equal(arr, original)


@pytest.mark.parametrize("as_index", [True, False])
@pytest.mark.parametrize(
    "values",
    [
        pd.to_datetime(["2020-01-01", "2020-02-01"]),
        TimedeltaIndex([1, 2], unit="D"),
        PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"),
    ],
)
@pytest.mark.parametrize(
    "klass",
    [
        list,
        np.array,
        pd.array,
        pd.Series,
        pd.Index,
        pd.Categorical,
        pd.CategoricalIndex,
    ],
)
def test_searchsorted_datetimelike_with_listlike(values, klass, as_index):
示例#10
0
    def test_constructor_pi_nat(self):
        idx = PeriodIndex(
            [Period("2011-01", freq="M"), NaT,
             Period("2011-01", freq="M")])
        exp = PeriodIndex(["2011-01", "NaT", "2011-01"], freq="M")
        tm.assert_index_equal(idx, exp)

        idx = PeriodIndex(
            np.array([
                Period("2011-01", freq="M"), NaT,
                Period("2011-01", freq="M")
            ]))
        tm.assert_index_equal(idx, exp)

        idx = PeriodIndex([
            NaT, NaT,
            Period("2011-01", freq="M"),
            Period("2011-01", freq="M")
        ])
        exp = PeriodIndex(["NaT", "NaT", "2011-01", "2011-01"], freq="M")
        tm.assert_index_equal(idx, exp)

        idx = PeriodIndex(
            np.array([
                NaT, NaT,
                Period("2011-01", freq="M"),
                Period("2011-01", freq="M")
            ]))
        tm.assert_index_equal(idx, exp)

        idx = PeriodIndex([NaT, NaT, "2011-01", "2011-01"], freq="M")
        tm.assert_index_equal(idx, exp)

        with pytest.raises(ValueError, match="freq not specified"):
            PeriodIndex([NaT, NaT])

        with pytest.raises(ValueError, match="freq not specified"):
            PeriodIndex(np.array([NaT, NaT]))

        with pytest.raises(ValueError, match="freq not specified"):
            PeriodIndex(["NaT", "NaT"])

        with pytest.raises(ValueError, match="freq not specified"):
            PeriodIndex(np.array(["NaT", "NaT"]))
示例#11
0
    def test_constructor(self):
        pi = period_range(freq="A", start="1/1/2001", end="12/1/2009")
        assert len(pi) == 9

        pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009")
        assert len(pi) == 4 * 9

        pi = period_range(freq="M", start="1/1/2001", end="12/1/2009")
        assert len(pi) == 12 * 9

        pi = period_range(freq="D", start="1/1/2001", end="12/31/2009")
        assert len(pi) == 365 * 9 + 2

        pi = period_range(freq="B", start="1/1/2001", end="12/31/2009")
        assert len(pi) == 261 * 9

        pi = period_range(freq="H", start="1/1/2001", end="12/31/2001 23:00")
        assert len(pi) == 365 * 24

        pi = period_range(freq="Min", start="1/1/2001", end="1/1/2001 23:59")
        assert len(pi) == 24 * 60

        pi = period_range(freq="S", start="1/1/2001", end="1/1/2001 23:59:59")
        assert len(pi) == 24 * 60 * 60

        start = Period("02-Apr-2005", "B")
        i1 = period_range(start=start, periods=20)
        assert len(i1) == 20
        assert i1.freq == start.freq
        assert i1[0] == start

        end_intv = Period("2006-12-31", "W")
        i1 = period_range(end=end_intv, periods=10)
        assert len(i1) == 10
        assert i1.freq == end_intv.freq
        assert i1[-1] == end_intv

        end_intv = Period("2006-12-31", "1w")
        i2 = period_range(end=end_intv, periods=10)
        assert len(i1) == len(i2)
        assert (i1 == i2).all()
        assert i1.freq == i2.freq

        end_intv = Period("2006-12-31", ("w", 1))
        i2 = period_range(end=end_intv, periods=10)
        assert len(i1) == len(i2)
        assert (i1 == i2).all()
        assert i1.freq == i2.freq

        end_intv = Period("2005-05-01", "B")
        i1 = period_range(start=start, end=end_intv)

        # infer freq from first element
        i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")])
        assert len(i2) == 2
        assert i2[0] == end_intv

        i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")]))
        assert len(i2) == 2
        assert i2[0] == end_intv

        # Mixed freq should fail
        vals = [end_intv, Period("2006-12-31", "w")]
        msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)"
        with pytest.raises(IncompatibleFrequency, match=msg):
            PeriodIndex(vals)
        vals = np.array(vals)
        with pytest.raises(IncompatibleFrequency, match=msg):
            PeriodIndex(vals)
示例#12
0
    def test_constructor_fromarraylike(self):
        idx = period_range("2007-01", periods=20, freq="M")

        # values is an array of Period, thus can retrieve freq
        tm.assert_index_equal(PeriodIndex(idx.values), idx)
        tm.assert_index_equal(PeriodIndex(list(idx.values)), idx)

        msg = "freq not specified and cannot be inferred"
        with pytest.raises(ValueError, match=msg):
            PeriodIndex(idx._ndarray_values)
        with pytest.raises(ValueError, match=msg):
            PeriodIndex(list(idx._ndarray_values))

        msg = "'Period' object is not iterable"
        with pytest.raises(TypeError, match=msg):
            PeriodIndex(data=Period("2007", freq="A"))

        result = PeriodIndex(iter(idx))
        tm.assert_index_equal(result, idx)

        result = PeriodIndex(idx)
        tm.assert_index_equal(result, idx)

        result = PeriodIndex(idx, freq="M")
        tm.assert_index_equal(result, idx)

        result = PeriodIndex(idx, freq=offsets.MonthEnd())
        tm.assert_index_equal(result, idx)
        assert result.freq == "M"

        result = PeriodIndex(idx, freq="2M")
        tm.assert_index_equal(result, idx.asfreq("2M"))
        assert result.freq == "2M"

        result = PeriodIndex(idx, freq=offsets.MonthEnd(2))
        tm.assert_index_equal(result, idx.asfreq("2M"))
        assert result.freq == "2M"

        result = PeriodIndex(idx, freq="D")
        exp = idx.asfreq("D", "e")
        tm.assert_index_equal(result, exp)
示例#13
0
 def test_constructor_invalid_quarters(self):
     msg = "Quarter must be 1 <= q <= 4"
     with pytest.raises(ValueError, match=msg):
         PeriodIndex(year=range(2000, 2004),
                     quarter=list(range(4)),
                     freq="Q-DEC")
示例#14
0
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
    """
    Encode input values as an enumerated type or categorical variable

    Parameters
    ----------
    values : ndarray (1-d)
        Sequence
    sort : boolean, default False
        Sort by values
    na_sentinel : int, default -1
        Value to mark "not found"
    size_hint : hint to the hashtable sizer

    Returns
    -------
    labels : the indexer to the original array
    uniques : ndarray (1-d) or Index
        the unique values. Index is returned when passed values is Index or
        Series

    note: an array of Periods will ignore sort as it returns an always sorted
    PeriodIndex
    """
    from pandas import Index, Series, DatetimeIndex, PeriodIndex

    # handling two possibilities here
    # - for a numpy datetimelike simply view as i8 then cast back
    # - for an extension datetimelike view as i8 then
    #   reconstruct from boxed values to transfer metadata
    dtype = None
    if needs_i8_conversion(values):
        if is_period_dtype(values):
            values = PeriodIndex(values)
            vals = values.asi8
        elif is_datetimetz(values):
            values = DatetimeIndex(values)
            vals = values.asi8
        else:
            # numpy dtype
            dtype = values.dtype
            vals = values.view(np.int64)
    else:
        vals = np.asarray(values)

    (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables)

    table = hash_klass(size_hint or len(vals))
    uniques = vec_klass()
    check_nulls = not is_integer_dtype(values)
    labels = table.get_labels(vals, uniques, 0, na_sentinel, check_nulls)

    labels = _ensure_platform_int(labels)

    uniques = uniques.to_array()

    if sort and len(uniques) > 0:
        uniques, labels = safe_sort(uniques,
                                    labels,
                                    na_sentinel=na_sentinel,
                                    assume_unique=True)

    if dtype is not None:
        uniques = uniques.astype(dtype)

    if isinstance(values, Index):
        uniques = values._shallow_copy(uniques, name=None)
    elif isinstance(values, Series):
        uniques = Index(uniques)
    return labels, uniques
示例#15
0
    def test_asfreq(self):
        pi1 = PeriodIndex(freq='A', start='1/1/2001', end='1/1/2001')
        pi2 = PeriodIndex(freq='Q', start='1/1/2001', end='1/1/2001')
        pi3 = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2001')
        pi4 = PeriodIndex(freq='D', start='1/1/2001', end='1/1/2001')
        pi5 = PeriodIndex(freq='H', start='1/1/2001', end='1/1/2001 00:00')
        pi6 = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 00:00')
        pi7 = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 00:00:00')

        assert pi1.asfreq('Q', 'S') == pi2
        assert pi1.asfreq('Q', 's') == pi2
        assert pi1.asfreq('M', 'start') == pi3
        assert pi1.asfreq('D', 'StarT') == pi4
        assert pi1.asfreq('H', 'beGIN') == pi5
        assert pi1.asfreq('Min', 'S') == pi6
        assert pi1.asfreq('S', 'S') == pi7

        assert pi2.asfreq('A', 'S') == pi1
        assert pi2.asfreq('M', 'S') == pi3
        assert pi2.asfreq('D', 'S') == pi4
        assert pi2.asfreq('H', 'S') == pi5
        assert pi2.asfreq('Min', 'S') == pi6
        assert pi2.asfreq('S', 'S') == pi7

        assert pi3.asfreq('A', 'S') == pi1
        assert pi3.asfreq('Q', 'S') == pi2
        assert pi3.asfreq('D', 'S') == pi4
        assert pi3.asfreq('H', 'S') == pi5
        assert pi3.asfreq('Min', 'S') == pi6
        assert pi3.asfreq('S', 'S') == pi7

        assert pi4.asfreq('A', 'S') == pi1
        assert pi4.asfreq('Q', 'S') == pi2
        assert pi4.asfreq('M', 'S') == pi3
        assert pi4.asfreq('H', 'S') == pi5
        assert pi4.asfreq('Min', 'S') == pi6
        assert pi4.asfreq('S', 'S') == pi7

        assert pi5.asfreq('A', 'S') == pi1
        assert pi5.asfreq('Q', 'S') == pi2
        assert pi5.asfreq('M', 'S') == pi3
        assert pi5.asfreq('D', 'S') == pi4
        assert pi5.asfreq('Min', 'S') == pi6
        assert pi5.asfreq('S', 'S') == pi7

        assert pi6.asfreq('A', 'S') == pi1
        assert pi6.asfreq('Q', 'S') == pi2
        assert pi6.asfreq('M', 'S') == pi3
        assert pi6.asfreq('D', 'S') == pi4
        assert pi6.asfreq('H', 'S') == pi5
        assert pi6.asfreq('S', 'S') == pi7

        assert pi7.asfreq('A', 'S') == pi1
        assert pi7.asfreq('Q', 'S') == pi2
        assert pi7.asfreq('M', 'S') == pi3
        assert pi7.asfreq('D', 'S') == pi4
        assert pi7.asfreq('H', 'S') == pi5
        assert pi7.asfreq('Min', 'S') == pi6

        pytest.raises(ValueError, pi7.asfreq, 'T', 'foo')
        result1 = pi1.asfreq('3M')
        result2 = pi1.asfreq('M')
        expected = PeriodIndex(freq='M', start='2001-12', end='2001-12')
        tm.assert_numpy_array_equal(result1.asi8, expected.asi8)
        assert result1.freqstr == '3M'
        tm.assert_numpy_array_equal(result2.asi8, expected.asi8)
        assert result2.freqstr == 'M'
示例#16
0
class TestPeriodArray(SharedTests):
    index_cls = PeriodIndex
    array_cls = PeriodArray
    scalar_type = Period
    example_dtype = PeriodIndex([], freq="W").dtype

    @pytest.fixture
    def arr1d(self, period_index):
        return period_index._data

    def test_from_pi(self, arr1d):
        pi = self.index_cls(arr1d)
        arr = arr1d
        assert list(arr) == list(pi)

        # Check that Index.__new__ knows what to do with PeriodArray
        pi2 = pd.Index(arr)
        assert isinstance(pi2, PeriodIndex)
        assert list(pi2) == list(arr)

    def test_astype_object(self, arr1d):
        pi = self.index_cls(arr1d)
        arr = arr1d
        asobj = arr.astype("O")
        assert isinstance(asobj, np.ndarray)
        assert asobj.dtype == "O"
        assert list(asobj) == list(pi)

    def test_take_fill_valid(self, arr1d):
        arr = arr1d

        value = NaT.value
        msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
        with pytest.raises(TypeError, match=msg):
            # require NaT, not iNaT, as it could be confused with an integer
            arr.take([-1, 1], allow_fill=True, fill_value=value)

        value = np.timedelta64("NaT", "ns")
        with pytest.raises(TypeError, match=msg):
            # require appropriate-dtype if we have a NA value
            arr.take([-1, 1], allow_fill=True, fill_value=value)

    @pytest.mark.parametrize("how", ["S", "E"])
    def test_to_timestamp(self, how, arr1d):
        pi = self.index_cls(arr1d)
        arr = arr1d

        expected = DatetimeArray(pi.to_timestamp(how=how))
        result = arr.to_timestamp(how=how)
        assert isinstance(result, DatetimeArray)

        # placeholder until these become actual EA subclasses and we can use
        #  an EA-specific tm.assert_ function
        tm.assert_index_equal(pd.Index(result), pd.Index(expected))

    def test_to_timestamp_out_of_bounds(self):
        # GH#19643 previously overflowed silently
        pi = pd.period_range("1500", freq="Y", periods=3)
        msg = "Out of bounds nanosecond timestamp: 1500-01-01 00:00:00"
        with pytest.raises(OutOfBoundsDatetime, match=msg):
            pi.to_timestamp()

        with pytest.raises(OutOfBoundsDatetime, match=msg):
            pi._data.to_timestamp()

    @pytest.mark.parametrize("propname", PeriodArray._bool_ops)
    def test_bool_properties(self, arr1d, propname):
        # in this case _bool_ops is just `is_leap_year`
        pi = self.index_cls(arr1d)
        arr = arr1d

        result = getattr(arr, propname)
        expected = np.array(getattr(pi, propname))

        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize("propname", PeriodArray._field_ops)
    def test_int_properties(self, arr1d, propname):
        pi = self.index_cls(arr1d)
        arr = arr1d

        result = getattr(arr, propname)
        expected = np.array(getattr(pi, propname))

        tm.assert_numpy_array_equal(result, expected)

    def test_array_interface(self, arr1d):
        arr = arr1d

        # default asarray gives objects
        result = np.asarray(arr)
        expected = np.array(list(arr), dtype=object)
        tm.assert_numpy_array_equal(result, expected)

        # to object dtype (same as default)
        result = np.asarray(arr, dtype=object)
        tm.assert_numpy_array_equal(result, expected)

        result = np.asarray(arr, dtype="int64")
        tm.assert_numpy_array_equal(result, arr.asi8)

        # to other dtypes
        msg = r"float\(\) argument must be a string or a( real)? number, not 'Period'"
        with pytest.raises(TypeError, match=msg):
            np.asarray(arr, dtype="float64")

        result = np.asarray(arr, dtype="S20")
        expected = np.asarray(arr).astype("S20")
        tm.assert_numpy_array_equal(result, expected)

    def test_strftime(self, arr1d):
        arr = arr1d

        result = arr.strftime("%Y")
        expected = np.array([per.strftime("%Y") for per in arr], dtype=object)
        tm.assert_numpy_array_equal(result, expected)

    def test_strftime_nat(self):
        # GH 29578
        arr = PeriodArray(PeriodIndex(["2019-01-01", NaT], dtype="period[D]"))

        result = arr.strftime("%Y-%m-%d")
        expected = np.array(["2019-01-01", np.nan], dtype=object)
        tm.assert_numpy_array_equal(result, expected)
示例#17
0
    def test_period_index_length(self):
        pi = period_range(freq='A', start='1/1/2001', end='12/1/2009')
        assert len(pi) == 9

        pi = period_range(freq='Q', start='1/1/2001', end='12/1/2009')
        assert len(pi) == 4 * 9

        pi = period_range(freq='M', start='1/1/2001', end='12/1/2009')
        assert len(pi) == 12 * 9

        start = Period('02-Apr-2005', 'B')
        i1 = period_range(start=start, periods=20)
        assert len(i1) == 20
        assert i1.freq == start.freq
        assert i1[0] == start

        end_intv = Period('2006-12-31', 'W')
        i1 = period_range(end=end_intv, periods=10)
        assert len(i1) == 10
        assert i1.freq == end_intv.freq
        assert i1[-1] == end_intv

        end_intv = Period('2006-12-31', '1w')
        i2 = period_range(end=end_intv, periods=10)
        assert len(i1) == len(i2)
        assert (i1 == i2).all()
        assert i1.freq == i2.freq

        end_intv = Period('2006-12-31', ('w', 1))
        i2 = period_range(end=end_intv, periods=10)
        assert len(i1) == len(i2)
        assert (i1 == i2).all()
        assert i1.freq == i2.freq

        msg = "start and end must have same freq"
        with pytest.raises(ValueError, match=msg):
            period_range(start=start, end=end_intv)

        end_intv = Period('2005-05-01', 'B')
        i1 = period_range(start=start, end=end_intv)

        msg = ("Of the three parameters: start, end, and periods, exactly two"
               " must be specified")
        with pytest.raises(ValueError, match=msg):
            period_range(start=start)

        # infer freq from first element
        i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')])
        assert len(i2) == 2
        assert i2[0] == end_intv

        i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')]))
        assert len(i2) == 2
        assert i2[0] == end_intv

        # Mixed freq should fail
        vals = [end_intv, Period('2006-12-31', 'w')]
        msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)"
        with pytest.raises(IncompatibleFrequency, match=msg):
            PeriodIndex(vals)
        vals = np.array(vals)
        with pytest.raises(ValueError, match=msg):
            PeriodIndex(vals)
示例#18
0
    def test_constructor_pi_nat(self):
        idx = PeriodIndex(
            [Period('2011-01', freq='M'), pd.NaT,
             Period('2011-01', freq='M')])
        exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M')
        tm.assert_index_equal(idx, exp)

        idx = PeriodIndex(
            np.array([
                Period('2011-01', freq='M'), pd.NaT,
                Period('2011-01', freq='M')
            ]))
        tm.assert_index_equal(idx, exp)

        idx = PeriodIndex([
            pd.NaT, pd.NaT,
            Period('2011-01', freq='M'),
            Period('2011-01', freq='M')
        ])
        exp = PeriodIndex(['NaT', 'NaT', '2011-01', '2011-01'], freq='M')
        tm.assert_index_equal(idx, exp)

        idx = PeriodIndex(
            np.array([
                pd.NaT, pd.NaT,
                Period('2011-01', freq='M'),
                Period('2011-01', freq='M')
            ]))
        tm.assert_index_equal(idx, exp)

        idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M')
        tm.assert_index_equal(idx, exp)

        with tm.assert_raises_regex(ValueError, 'freq not specified'):
            PeriodIndex([pd.NaT, pd.NaT])

        with tm.assert_raises_regex(ValueError, 'freq not specified'):
            PeriodIndex(np.array([pd.NaT, pd.NaT]))

        with tm.assert_raises_regex(ValueError, 'freq not specified'):
            PeriodIndex(['NaT', 'NaT'])

        with tm.assert_raises_regex(ValueError, 'freq not specified'):
            PeriodIndex(np.array(['NaT', 'NaT']))
示例#19
0
 def test_pindex_qaccess(self):
     pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q')
     s = Series(np.random.rand(len(pi)), index=pi).cumsum()
     # Todo: fix these accessors!
     assert s['05Q4'] == s[2]
示例#20
0
 def test_constructor_simple_new_empty(self):
     # GH13079
     idx = PeriodIndex([], freq='M', name='p')
     result = idx._simple_new(idx, name='p', freq='M')
     tm.assert_index_equal(result, idx)
示例#21
0
    def test_union(self, sort):
        # union
        other1 = period_range("1/1/2000", freq="D", periods=5)
        rng1 = period_range("1/6/2000", freq="D", periods=5)
        expected1 = PeriodIndex(
            [
                "2000-01-06",
                "2000-01-07",
                "2000-01-08",
                "2000-01-09",
                "2000-01-10",
                "2000-01-01",
                "2000-01-02",
                "2000-01-03",
                "2000-01-04",
                "2000-01-05",
            ],
            freq="D",
        )

        rng2 = period_range("1/1/2000", freq="D", periods=5)
        other2 = period_range("1/4/2000", freq="D", periods=5)
        expected2 = period_range("1/1/2000", freq="D", periods=8)

        rng3 = period_range("1/1/2000", freq="D", periods=5)
        other3 = PeriodIndex([], freq="D")
        expected3 = period_range("1/1/2000", freq="D", periods=5)

        rng4 = period_range("2000-01-01 09:00", freq="H", periods=5)
        other4 = period_range("2000-01-02 09:00", freq="H", periods=5)
        expected4 = PeriodIndex(
            [
                "2000-01-01 09:00",
                "2000-01-01 10:00",
                "2000-01-01 11:00",
                "2000-01-01 12:00",
                "2000-01-01 13:00",
                "2000-01-02 09:00",
                "2000-01-02 10:00",
                "2000-01-02 11:00",
                "2000-01-02 12:00",
                "2000-01-02 13:00",
            ],
            freq="H",
        )

        rng5 = PeriodIndex(
            ["2000-01-01 09:01", "2000-01-01 09:03", "2000-01-01 09:05"],
            freq="T")
        other5 = PeriodIndex(
            ["2000-01-01 09:01", "2000-01-01 09:05", "2000-01-01 09:08"],
            freq="T")
        expected5 = PeriodIndex(
            [
                "2000-01-01 09:01",
                "2000-01-01 09:03",
                "2000-01-01 09:05",
                "2000-01-01 09:08",
            ],
            freq="T",
        )

        rng6 = period_range("2000-01-01", freq="M", periods=7)
        other6 = period_range("2000-04-01", freq="M", periods=7)
        expected6 = period_range("2000-01-01", freq="M", periods=10)

        rng7 = period_range("2003-01-01", freq="A", periods=5)
        other7 = period_range("1998-01-01", freq="A", periods=8)
        expected7 = PeriodIndex(
            [
                "2003",
                "2004",
                "2005",
                "2006",
                "2007",
                "1998",
                "1999",
                "2000",
                "2001",
                "2002",
            ],
            freq="A",
        )

        rng8 = PeriodIndex(
            ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"],
            freq="D")
        other8 = period_range("1/6/2000", freq="D", periods=5)
        expected8 = PeriodIndex(
            [
                "1/3/2000",
                "1/2/2000",
                "1/1/2000",
                "1/5/2000",
                "1/4/2000",
                "1/6/2000",
                "1/7/2000",
                "1/8/2000",
                "1/9/2000",
                "1/10/2000",
            ],
            freq="D",
        )

        for rng, other, expected in [
            (rng1, other1, expected1),
            (rng2, other2, expected2),
            (rng3, other3, expected3),
            (rng4, other4, expected4),
            (rng5, other5, expected5),
            (rng6, other6, expected6),
            (rng7, other7, expected7),
            (rng8, other8, expected8),
        ]:

            result_union = rng.union(other, sort=sort)
            if sort is None:
                expected = expected.sort_values()
            tm.assert_index_equal(result_union, expected)
示例#22
0
 def test_constructor_use_start_freq(self):
     # GH #1118
     p = Period('4/2/2012', freq='B')
     index = PeriodIndex(start=p, periods=10)
     expected = PeriodIndex(start='4/2/2012', periods=10, freq='B')
     tm.assert_index_equal(index, expected)
示例#23
0
    def test_difference(self, sort):
        # diff
        period_rng = [
            "1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"
        ]
        rng1 = PeriodIndex(period_rng, freq="D")
        other1 = period_range("1/6/2000", freq="D", periods=5)
        expected1 = rng1

        rng2 = PeriodIndex(period_rng, freq="D")
        other2 = period_range("1/4/2000", freq="D", periods=5)
        expected2 = PeriodIndex(["1/3/2000", "1/2/2000", "1/1/2000"], freq="D")

        rng3 = PeriodIndex(period_rng, freq="D")
        other3 = PeriodIndex([], freq="D")
        expected3 = rng3

        period_rng = [
            "2000-01-01 10:00",
            "2000-01-01 09:00",
            "2000-01-01 12:00",
            "2000-01-01 11:00",
            "2000-01-01 13:00",
        ]
        rng4 = PeriodIndex(period_rng, freq="H")
        other4 = period_range("2000-01-02 09:00", freq="H", periods=5)
        expected4 = rng4

        rng5 = PeriodIndex(
            ["2000-01-01 09:03", "2000-01-01 09:01", "2000-01-01 09:05"],
            freq="T")
        other5 = PeriodIndex(["2000-01-01 09:01", "2000-01-01 09:05"],
                             freq="T")
        expected5 = PeriodIndex(["2000-01-01 09:03"], freq="T")

        period_rng = [
            "2000-02-01",
            "2000-01-01",
            "2000-06-01",
            "2000-07-01",
            "2000-05-01",
            "2000-03-01",
            "2000-04-01",
        ]
        rng6 = PeriodIndex(period_rng, freq="M")
        other6 = period_range("2000-04-01", freq="M", periods=7)
        expected6 = PeriodIndex(["2000-02-01", "2000-01-01", "2000-03-01"],
                                freq="M")

        period_rng = ["2003", "2007", "2006", "2005", "2004"]
        rng7 = PeriodIndex(period_rng, freq="A")
        other7 = period_range("1998-01-01", freq="A", periods=8)
        expected7 = PeriodIndex(["2007", "2006"], freq="A")

        for rng, other, expected in [
            (rng1, other1, expected1),
            (rng2, other2, expected2),
            (rng3, other3, expected3),
            (rng4, other4, expected4),
            (rng5, other5, expected5),
            (rng6, other6, expected6),
            (rng7, other7, expected7),
        ]:
            result_difference = rng.difference(other, sort=sort)
            if sort is None and len(other):
                # We dont sort (yet?) when empty GH#24959
                expected = expected.sort_values()
            tm.assert_index_equal(result_difference, expected)
示例#24
0
    def test_constructor(self):
        pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
        assert len(pi) == 9

        pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009')
        assert len(pi) == 4 * 9

        pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009')
        assert len(pi) == 12 * 9

        pi = PeriodIndex(freq='D', start='1/1/2001', end='12/31/2009')
        assert len(pi) == 365 * 9 + 2

        pi = PeriodIndex(freq='B', start='1/1/2001', end='12/31/2009')
        assert len(pi) == 261 * 9

        pi = PeriodIndex(freq='H', start='1/1/2001', end='12/31/2001 23:00')
        assert len(pi) == 365 * 24

        pi = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 23:59')
        assert len(pi) == 24 * 60

        pi = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 23:59:59')
        assert len(pi) == 24 * 60 * 60

        start = Period('02-Apr-2005', 'B')
        i1 = PeriodIndex(start=start, periods=20)
        assert len(i1) == 20
        assert i1.freq == start.freq
        assert i1[0] == start

        end_intv = Period('2006-12-31', 'W')
        i1 = PeriodIndex(end=end_intv, periods=10)
        assert len(i1) == 10
        assert i1.freq == end_intv.freq
        assert i1[-1] == end_intv

        end_intv = Period('2006-12-31', '1w')
        i2 = PeriodIndex(end=end_intv, periods=10)
        assert len(i1) == len(i2)
        assert (i1 == i2).all()
        assert i1.freq == i2.freq

        end_intv = Period('2006-12-31', ('w', 1))
        i2 = PeriodIndex(end=end_intv, periods=10)
        assert len(i1) == len(i2)
        assert (i1 == i2).all()
        assert i1.freq == i2.freq

        end_intv = Period('2005-05-01', 'B')
        i1 = PeriodIndex(start=start, end=end_intv)

        # infer freq from first element
        i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')])
        assert len(i2) == 2
        assert i2[0] == end_intv

        i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')]))
        assert len(i2) == 2
        assert i2[0] == end_intv

        # Mixed freq should fail
        vals = [end_intv, Period('2006-12-31', 'w')]
        pytest.raises(ValueError, PeriodIndex, vals)
        vals = np.array(vals)
        pytest.raises(ValueError, PeriodIndex, vals)
示例#25
0
 def test_make_time_series(self):
     index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
     series = Series(1, index=index)
     assert isinstance(series, Series)
示例#26
0
 def test_recreate_from_data(self):
     for o in ['M', 'Q', 'A', 'D', 'B', 'T', 'S', 'L', 'U', 'N', 'H']:
         org = PeriodIndex(start='2001/04/01', freq=o, periods=1)
         idx = PeriodIndex(org.values, freq=o)
         tm.assert_index_equal(idx, org)
示例#27
0
 def test_end_time(self):
     index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31')
     expected_index = date_range('2016-01-01', end='2016-05-31', freq='M')
     tm.assert_index_equal(index.end_time, expected_index)
示例#28
0
 def test_asfreq_nat(self):
     idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M')
     result = idx.asfreq(freq='Q')
     expected = PeriodIndex(['2011Q1', '2011Q1', 'NaT', '2011Q2'], freq='Q')
     tm.assert_index_equal(result, expected)
示例#29
0
 def test_pickle_round_trip(self):
     for freq in ['D', 'M', 'A']:
         idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq=freq)
         result = tm.round_trip_pickle(idx)
         tm.assert_index_equal(result, idx)
示例#30
0
    def test_frame_index_to_string(self):
        index = PeriodIndex(['2011-1', '2011-2', '2011-3'], freq='M')
        frame = DataFrame(np.random.randn(3, 4), index=index)

        # it works!
        frame.to_string()