Пример #1
0
 def test_pi_sub_pdnat(self):
     # GH#13071
     idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'],
                       freq='M', name='idx')
     exp = pd.TimedeltaIndex([pd.NaT] * 4, name='idx')
     tm.assert_index_equal(pd.NaT - idx, exp)
     tm.assert_index_equal(idx - pd.NaT, exp)
Пример #2
0
    def test_inf_upcast(self):
        # GH 16957
        # We should be able to use np.inf as a key
        # np.inf should cause an index to convert to float

        # Test with np.inf in rows
        df = DataFrame(columns=[0])
        df.loc[1] = 1
        df.loc[2] = 2
        df.loc[np.inf] = 3

        # make sure we can look up the value
        assert df.loc[np.inf, 0] == 3

        result = df.index
        expected = pd.Float64Index([1, 2, np.inf])
        tm.assert_index_equal(result, expected)

        # Test with np.inf in columns
        df = DataFrame()
        df.loc[0, 0] = 1
        df.loc[1, 1] = 2
        df.loc[0, np.inf] = 3

        result = df.columns
        expected = pd.Float64Index([0, 1, np.inf])
        tm.assert_index_equal(result, expected)
Пример #3
0
    def test_roundtrip_pickle_with_tz(self):

        # GH 8367
        # round-trip of timezone
        index = date_range('20130101', periods=3, tz='US/Eastern', name='foo')
        unpickled = tm.round_trip_pickle(index)
        tm.assert_index_equal(index, unpickled)
Пример #4
0
    def test_pi_add_offset_array(self, box):
        # GH#18849
        pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('2016Q2')])
        offs = box([pd.offsets.QuarterEnd(n=1, startingMonth=12),
                    pd.offsets.QuarterEnd(n=-2, startingMonth=12)])
        expected = pd.PeriodIndex([pd.Period('2015Q2'), pd.Period('2015Q4')])

        with tm.assert_produces_warning(PerformanceWarning):
            res = pi + offs
        tm.assert_index_equal(res, expected)

        with tm.assert_produces_warning(PerformanceWarning):
            res2 = offs + pi
        tm.assert_index_equal(res2, expected)

        unanchored = np.array([pd.offsets.Hour(n=1),
                               pd.offsets.Minute(n=-2)])
        # addition/subtraction ops with incompatible offsets should issue
        # a PerformanceWarning and _then_ raise a TypeError.
        with pytest.raises(IncompatibleFrequency):
            with tm.assert_produces_warning(PerformanceWarning):
                pi + unanchored
        with pytest.raises(IncompatibleFrequency):
            with tm.assert_produces_warning(PerformanceWarning):
                unanchored + pi
Пример #5
0
    def test_map_bug_1677(self):
        index = DatetimeIndex(['2012-04-25 09:30:00.393000'])
        f = index.asof

        result = index.map(f)
        expected = Index([f(index[0])])
        tm.assert_index_equal(result, expected)
Пример #6
0
    def test_get_duplicates(self):
        idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02',
                             '2000-01-03', '2000-01-03', '2000-01-04'])

        result = idx.get_duplicates()
        ex = DatetimeIndex(['2000-01-02', '2000-01-03'])
        tm.assert_index_equal(result, ex)
Пример #7
0
    def test_map(self):
        rng = date_range('1/1/2000', periods=10)

        f = lambda x: x.strftime('%Y%m%d')
        result = rng.map(f)
        exp = Index([f(x) for x in rng], dtype='<U8')
        tm.assert_index_equal(result, exp)
Пример #8
0
 def check_coerce(self, a, b, is_float_index=True):
     assert a.equals(b)
     tm.assert_index_equal(a, b, exact=False)
     if is_float_index:
         assert isinstance(b, Float64Index)
     else:
         self.check_is_index(b)
Пример #9
0
    def test_take_fill_value(self):
        # see gh-12631
        idx = self._holder([1, 2, 3], name='xxx')
        result = idx.take(np.array([1, 0, -1]))
        expected = self._holder([2, 1, 3], name='xxx')
        tm.assert_index_equal(result, expected)

        name = self._holder.__name__
        msg = ("Unable to fill values because "
               "{name} cannot contain NA").format(name=name)

        # fill_value=True
        with pytest.raises(ValueError, match=msg):
            idx.take(np.array([1, 0, -1]), fill_value=True)

        # allow_fill=False
        result = idx.take(np.array([1, 0, -1]), allow_fill=False,
                          fill_value=True)
        expected = self._holder([2, 1, 3], name='xxx')
        tm.assert_index_equal(result, expected)

        with pytest.raises(ValueError, match=msg):
            idx.take(np.array([1, 0, -2]), fill_value=True)
        with pytest.raises(ValueError, match=msg):
            idx.take(np.array([1, 0, -5]), fill_value=True)

        with pytest.raises(IndexError):
            idx.take(np.array([1, -5]))
Пример #10
0
 def test_begin_year_alias(self, freq):
     # see gh-9313
     rng = date_range("1/1/2013", "7/1/2017", freq=freq)
     exp = pd.DatetimeIndex(["2013-01-01", "2014-01-01",
                             "2015-01-01", "2016-01-01",
                             "2017-01-01"], freq=freq)
     tm.assert_index_equal(rng, exp)
Пример #11
0
 def _assert_insert_conversion(self, original, value,
                               expected, expected_dtype):
     """ test coercion triggered by insert """
     target = original.copy()
     res = target.insert(1, value)
     tm.assert_index_equal(res, expected)
     assert res.dtype == expected_dtype
Пример #12
0
 def test_constructor_interval(self):
     result = Categorical([Interval(1, 2), Interval(2, 3), Interval(3, 6)],
                          ordered=True)
     ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)])
     exp = Categorical(ii, ordered=True)
     tm.assert_categorical_equal(result, exp)
     tm.assert_index_equal(result.categories, ii)
Пример #13
0
 def test_update_dtype_string(self, ordered):
     dtype = CategoricalDtype(list('abc'), ordered)
     expected_categories = dtype.categories
     expected_ordered = dtype.ordered
     result = dtype.update_dtype('category')
     tm.assert_index_equal(result.categories, expected_categories)
     assert result.ordered is expected_ordered
Пример #14
0
    def test_constructor_from_index_series_timedelta(self):
        idx = timedelta_range('1 days', freq='D', periods=3)
        result = Categorical(idx)
        tm.assert_index_equal(result.categories, idx)

        result = Categorical(Series(idx))
        tm.assert_index_equal(result.categories, idx)
Пример #15
0
    def test_constructor_from_index_series_period(self):
        idx = period_range('2015-01-01', freq='D', periods=3)
        result = Categorical(idx)
        tm.assert_index_equal(result.categories, idx)

        result = Categorical(Series(idx))
        tm.assert_index_equal(result.categories, idx)
Пример #16
0
    def test_combineFrame(self, float_frame, mixed_float_frame,
                          mixed_int_frame):
        frame_copy = float_frame.reindex(float_frame.index[::2])

        del frame_copy['D']
        frame_copy['C'][:5] = np.nan

        added = float_frame + frame_copy

        indexer = added['A'].dropna().index
        exp = (float_frame['A'] * 2).copy()

        tm.assert_series_equal(added['A'].dropna(), exp.loc[indexer])

        exp.loc[~exp.index.isin(indexer)] = np.nan
        tm.assert_series_equal(added['A'], exp.loc[added['A'].index])

        assert np.isnan(added['C'].reindex(frame_copy.index)[:5]).all()

        # assert(False)

        assert np.isnan(added['D']).all()

        self_added = float_frame + float_frame
        tm.assert_index_equal(self_added.index, float_frame.index)

        added_rev = frame_copy + float_frame
        assert np.isnan(added['D']).all()
        assert np.isnan(added_rev['D']).all()

        # corner cases

        # empty
        plus_empty = float_frame + DataFrame()
        assert np.isnan(plus_empty.values).all()

        empty_plus = DataFrame() + float_frame
        assert np.isnan(empty_plus.values).all()

        empty_empty = DataFrame() + DataFrame()
        assert empty_empty.empty

        # out of order
        reverse = float_frame.reindex(columns=float_frame.columns[::-1])

        assert_frame_equal(reverse + float_frame, float_frame * 2)

        # mix vs float64, upcast
        added = float_frame + mixed_float_frame
        _check_mixed_float(added, dtype='float64')
        added = mixed_float_frame + float_frame
        _check_mixed_float(added, dtype='float64')

        # mix vs mix
        added = mixed_float_frame + mixed_float_frame
        _check_mixed_float(added, dtype=dict(C=None))

        # with int
        added = float_frame + mixed_int_frame
        _check_mixed_float(added, dtype='float64')
Пример #17
0
    def test_constructor_with_datetimelike(self):

        # 12077
        # constructor wwth a datetimelike and NaT

        for dtl in [date_range('1995-01-01 00:00:00', periods=5, freq='s'),
                    date_range('1995-01-01 00:00:00', periods=5,
                               freq='s', tz='US/Eastern'),
                    timedelta_range('1 day', periods=5, freq='s')]:

            s = Series(dtl)
            c = Categorical(s)
            expected = type(dtl)(s)
            expected.freq = None
            tm.assert_index_equal(c.categories, expected)
            tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype='int8'))

            # with NaT
            s2 = s.copy()
            s2.iloc[-1] = NaT
            c = Categorical(s2)
            expected = type(dtl)(s2.dropna())
            expected.freq = None
            tm.assert_index_equal(c.categories, expected)

            exp = np.array([0, 1, 2, 3, -1], dtype=np.int8)
            tm.assert_numpy_array_equal(c.codes, exp)

            result = repr(c)
            assert 'NaT' in result
Пример #18
0
    def test_frame_to_period(self):
        K = 5
        from pandas.tseries.period import period_range

        dr = date_range('1/1/2000', '1/1/2001')
        pr = period_range('1/1/2000', '1/1/2001')
        df = DataFrame(randn(len(dr), K), index=dr)
        df['mix'] = 'a'

        pts = df.to_period()
        exp = df.copy()
        exp.index = pr
        assert_frame_equal(pts, exp)

        pts = df.to_period('M')
        tm.assert_index_equal(pts.index, exp.index.asfreq('M'))

        df = df.T
        pts = df.to_period(axis=1)
        exp = df.copy()
        exp.columns = pr
        assert_frame_equal(pts, exp)

        pts = df.to_period('M', axis=1)
        tm.assert_index_equal(pts.columns, exp.columns.asfreq('M'))

        self.assertRaises(ValueError, df.to_period, axis=2)
Пример #19
0
    def test_value_counts_inferred(self):
        klasses = [Index, Series]
        for klass in klasses:
            s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
            s = klass(s_values)
            expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(s.value_counts(), expected)

            if isinstance(s, Index):
                exp = Index(np.unique(np.array(s_values, dtype=np.object_)))
                tm.assert_index_equal(s.unique(), exp)
            else:
                exp = np.unique(np.array(s_values, dtype=np.object_))
                tm.assert_numpy_array_equal(s.unique(), exp)

            assert s.nunique() == 4
            # don't sort, have to sort after the fact as not sorting is
            # platform-dep
            hist = s.value_counts(sort=False).sort_values()
            expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values()
            tm.assert_series_equal(hist, expected)

            # sort ascending
            hist = s.value_counts(ascending=True)
            expected = Series([1, 2, 3, 4], index=list('cdab'))
            tm.assert_series_equal(hist, expected)

            # relative histogram.
            hist = s.value_counts(normalize=True)
            expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(hist, expected)
Пример #20
0
    def test_delete(self):
        idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx')

        # prserve freq
        expected_0 = timedelta_range(start='2 Days', periods=4, freq='D',
                                     name='idx')
        expected_4 = timedelta_range(start='1 Days', periods=4, freq='D',
                                     name='idx')

        # reset freq to None
        expected_1 = TimedeltaIndex(
            ['1 day', '3 day', '4 day', '5 day'], freq=None, name='idx')

        cases = {0: expected_0,
                 -5: expected_0,
                 -1: expected_4,
                 4: expected_4,
                 1: expected_1}
        for n, expected in compat.iteritems(cases):
            result = idx.delete(n)
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq

        with pytest.raises((IndexError, ValueError)):
            # either depeidnig on numpy version
            result = idx.delete(5)
Пример #21
0
    def test_delete_slice(self):
        idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx')

        # prserve freq
        expected_0_2 = timedelta_range(start='4 days', periods=7, freq='D',
                                       name='idx')
        expected_7_9 = timedelta_range(start='1 days', periods=7, freq='D',
                                       name='idx')

        # reset freq to None
        expected_3_5 = TimedeltaIndex(['1 d', '2 d', '3 d',
                                       '7 d', '8 d', '9 d', '10d'],
                                      freq=None, name='idx')

        cases = {(0, 1, 2): expected_0_2,
                 (7, 8, 9): expected_7_9,
                 (3, 4, 5): expected_3_5}
        for n, expected in compat.iteritems(cases):
            result = idx.delete(n)
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq

            result = idx.delete(slice(n[0], n[-1] + 1))
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq
Пример #22
0
    def test_mul_index(self):
        idx = self.create_index()

        # in general not true for RangeIndex
        if not isinstance(idx, RangeIndex):
            result = idx * idx
            tm.assert_index_equal(result, idx ** 2)
Пример #23
0
 def test_CalendarDay_range_with_dst_crossing(self):
     # GH 20596
     result = date_range('2018-10-23', '2018-11-06', freq='7CD',
                         tz='Europe/Paris')
     expected = date_range('2018-10-23', '2018-11-06',
                           freq=pd.DateOffset(days=7), tz='Europe/Paris')
     tm.assert_index_equal(result, expected)
Пример #24
0
def test_union_store_indexes(populate_store):
    store = pandas.HDFStore(populate_store['name'], mode = 'r+')
    index = populate_store['index']
    union = utils.union_store_indexes(store)
    assert_index_equal(index, union)
    store.close()
    os.remove(populate_store['name'])
Пример #25
0
def test_concat_unions_categoricals():
    # Categorical DataFrame, regular index
    tm.assert_frame_equal(_concat(frames), pd.concat(frames2))

    # Categorical Series, regular index
    tm.assert_series_equal(_concat([i.y for i in frames]),
                           pd.concat([i.y for i in frames2]))

    # Categorical Index
    tm.assert_index_equal(_concat([i.index for i in frames3]),
                          pd.concat([i for i in frames4]).index)

    # Categorical DataFrame, Categorical Index
    tm.assert_frame_equal(_concat(frames3), pd.concat(frames4))

    # Non-categorical DataFrame, Categorical Index
    tm.assert_frame_equal(_concat([i[['x', 'z']] for i in frames3]),
                          pd.concat([i[['x', 'z']] for i in frames4]))

    # Categorical Series, Categorical Index
    tm.assert_series_equal(_concat([i.z for i in frames3]),
                           pd.concat([i.z for i in frames4]))

    # Non-categorical Series, Categorical Index
    tm.assert_series_equal(_concat([i.x for i in frames3]),
                           pd.concat([i.x for i in frames4]))

    # MultiIndex with Categorical Index
    tm.assert_index_equal(_concat([i.index for i in frames5]),
                          pd.concat([i for i in frames6]).index)

    # DataFrame, MultiIndex with CategoricalIndex
    tm.assert_frame_equal(_concat(frames5), pd.concat(frames6))
Пример #26
0
def test_constructor_single_level():
    result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
                        labels=[[0, 1, 2, 3]], names=['first'])
    assert isinstance(result, MultiIndex)
    expected = Index(['foo', 'bar', 'baz', 'qux'], name='first')
    tm.assert_index_equal(result.levels[0], expected)
    assert result.names == ['first']
Пример #27
0
    def test_astype_object2(self):
        idx = pd.period_range(start='2013-01-01', periods=4, freq='M',
                              name='idx')
        expected_list = [pd.Period('2013-01-31', freq='M'),
                         pd.Period('2013-02-28', freq='M'),
                         pd.Period('2013-03-31', freq='M'),
                         pd.Period('2013-04-30', freq='M')]
        expected = pd.Index(expected_list, dtype=object, name='idx')
        result = idx.astype(object)
        assert isinstance(result, Index)
        assert result.dtype == object
        tm.assert_index_equal(result, expected)
        assert result.name == expected.name
        assert idx.tolist() == expected_list

        idx = PeriodIndex(['2013-01-01', '2013-01-02', 'NaT',
                           '2013-01-04'], freq='D', name='idx')
        expected_list = [pd.Period('2013-01-01', freq='D'),
                         pd.Period('2013-01-02', freq='D'),
                         pd.Period('NaT', freq='D'),
                         pd.Period('2013-01-04', freq='D')]
        expected = pd.Index(expected_list, dtype=object, name='idx')
        result = idx.astype(object)
        assert isinstance(result, Index)
        assert result.dtype == object
        tm.assert_index_equal(result, expected)
        for i in [0, 1, 3]:
            assert result[i] == expected[i]
        assert result[2] is pd.NaT
        assert result.name == expected.name

        result_list = idx.tolist()
        for i in [0, 1, 3]:
            assert result_list[i] == expected_list[i]
        assert result_list[2] is pd.NaT
Пример #28
0
    def test_categorical_order(self):
        # Directly construct using expected codes
        # Format is is_cat, col_name, labels (in order), underlying data
        expected = [(True, 'ordered', ['a', 'b', 'c', 'd', 'e'], np.arange(5)),
                    (True, 'reverse', ['a', 'b', 'c', 'd', 'e'], np.arange(5)[::-1]),
                    (True, 'noorder', ['a', 'b', 'c', 'd', 'e'], np.array([2, 1, 4, 0, 3])),
                    (True, 'floating', ['a', 'b', 'c', 'd', 'e'], np.arange(0, 5)),
                    (True, 'float_missing', ['a', 'd', 'e'], np.array([0, 1, 2, -1, -1])),
                    (False, 'nolabel', [1.0, 2.0, 3.0, 4.0, 5.0], np.arange(5)),
                    (True, 'int32_mixed', ['d', 2, 'e', 'b', 'a'], np.arange(5))]
        cols = []
        for is_cat, col, labels, codes in expected:
            if is_cat:
                cols.append((col, pd.Categorical.from_codes(codes, labels)))
            else:
                cols.append((col, pd.Series(labels, dtype=np.float32)))
        expected = DataFrame.from_items(cols)

        # Read with and with out categoricals, ensure order is identical
        parsed_115 = read_stata(self.dta19_115)
        parsed_117 = read_stata(self.dta19_117)
        tm.assert_frame_equal(expected, parsed_115)
        tm.assert_frame_equal(expected, parsed_117)

        # Check identity of codes
        for col in expected:
            if is_categorical_dtype(expected[col]):
                tm.assert_series_equal(expected[col].cat.codes,
                                       parsed_115[col].cat.codes)
                tm.assert_index_equal(expected[col].cat.categories,
                                      parsed_115[col].cat.categories)
Пример #29
0
    def test_ensure_copied_data(self):
        # Check the "copy" argument of each Index.__new__ is honoured
        # GH12309
        for name, index in compat.iteritems(self.indices):
            init_kwargs = {}
            if isinstance(index, PeriodIndex):
                # Needs "freq" specification:
                init_kwargs['freq'] = index.freq
            elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)):
                # RangeIndex cannot be initialized from data
                # MultiIndex and CategoricalIndex are tested separately
                continue

            index_type = index.__class__
            result = index_type(index.values, copy=True, **init_kwargs)
            tm.assert_index_equal(index, result)
            tm.assert_numpy_array_equal(index.values, result.values,
                                        check_same='copy')

            if not isinstance(index, PeriodIndex):
                result = index_type(index.values, copy=False, **init_kwargs)
                tm.assert_numpy_array_equal(index.values, result.values,
                                            check_same='same')
                tm.assert_numpy_array_equal(index._values, result._values,
                                            check_same='same')
            else:
                # .values an object array of Period, thus copied
                result = index_type(ordinal=index.asi8, copy=False,
                                    **init_kwargs)
                tm.assert_numpy_array_equal(index._values, result._values,
                                            check_same='same')
Пример #30
0
    def test_rpow_float(self):
        # test power calculations both ways, GH 14973
        idx = self.create_index()

        expected = pd.Float64Index(2.0**idx.values)
        result = 2.0**idx
        tm.assert_index_equal(result, expected)
Пример #31
0
    def test_datetime_name_accessors(self, time_locale):
        # Test Monday -> Sunday and January -> December, in that sequence
        if time_locale is None:
            # If the time_locale is None, day-name and month_name should
            # return the english attributes
            expected_days = [
                "Monday",
                "Tuesday",
                "Wednesday",
                "Thursday",
                "Friday",
                "Saturday",
                "Sunday",
            ]
            expected_months = [
                "January",
                "February",
                "March",
                "April",
                "May",
                "June",
                "July",
                "August",
                "September",
                "October",
                "November",
                "December",
            ]
        else:
            with tm.set_locale(time_locale, locale.LC_TIME):
                expected_days = calendar.day_name[:]
                expected_months = calendar.month_name[1:]

        # GH#11128
        dti = pd.date_range(freq="D", start=datetime(1998, 1, 1), periods=365)
        english_days = [
            "Monday",
            "Tuesday",
            "Wednesday",
            "Thursday",
            "Friday",
            "Saturday",
            "Sunday",
        ]
        for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
            name = name.capitalize()
            assert dti.day_name(locale=time_locale)[day] == name
            ts = Timestamp(datetime(2016, 4, day))
            assert ts.day_name(locale=time_locale) == name
        dti = dti.append(DatetimeIndex([pd.NaT]))
        assert np.isnan(dti.day_name(locale=time_locale)[-1])
        ts = Timestamp(pd.NaT)
        assert np.isnan(ts.day_name(locale=time_locale))

        # GH#12805
        dti = pd.date_range(freq="M", start="2012", end="2013")
        result = dti.month_name(locale=time_locale)
        expected = Index([month.capitalize() for month in expected_months])

        # work around different normalization schemes
        # https://github.com/pandas-dev/pandas/issues/22342
        result = result.str.normalize("NFD")
        expected = expected.str.normalize("NFD")

        tm.assert_index_equal(result, expected)

        for date, expected in zip(dti, expected_months):
            result = date.month_name(locale=time_locale)
            expected = expected.capitalize()

            result = unicodedata.normalize("NFD", result)
            expected = unicodedata.normalize("NFD", result)

            assert result == expected
        dti = dti.append(DatetimeIndex([pd.NaT]))
        assert np.isnan(dti.month_name(locale=time_locale)[-1])
Пример #32
0
def test_from_product_empty_one_level():
    result = MultiIndex.from_product([[]], names=['A'])
    expected = pd.Index([], name='A')
    tm.assert_index_equal(result.levels[0], expected)
Пример #33
0
def test_from_tuples_empty():
    # GH 16777
    result = MultiIndex.from_tuples([], names=['a', 'b'])
    expected = MultiIndex.from_arrays(arrays=[[], []], names=['a', 'b'])
    tm.assert_index_equal(result, expected)
Пример #34
0
def test_from_arrays_index_series_categorical():
    # GH13743
    idx1 = pd.CategoricalIndex(list("abcaab"),
                               categories=list("bac"),
                               ordered=False)
    idx2 = pd.CategoricalIndex(list("abcaab"),
                               categories=list("bac"),
                               ordered=True)

    result = pd.MultiIndex.from_arrays([idx1, idx2])
    tm.assert_index_equal(result.get_level_values(0), idx1)
    tm.assert_index_equal(result.get_level_values(1), idx2)

    result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
    tm.assert_index_equal(result2.get_level_values(0), idx1)
    tm.assert_index_equal(result2.get_level_values(1), idx2)

    result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values])
    tm.assert_index_equal(result3.get_level_values(0), idx1)
    tm.assert_index_equal(result3.get_level_values(1), idx2)
Пример #35
0
def test_from_arrays_index_datetimelike_mixed():
    idx1 = pd.date_range('2015-01-01 10:00',
                         freq='D',
                         periods=3,
                         tz='US/Eastern')
    idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3)
    idx3 = pd.timedelta_range('1 days', freq='D', periods=3)
    idx4 = pd.period_range('2011-01-01', freq='D', periods=3)

    result = pd.MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
    tm.assert_index_equal(result.get_level_values(0), idx1)
    tm.assert_index_equal(result.get_level_values(1), idx2)
    tm.assert_index_equal(result.get_level_values(2), idx3)
    tm.assert_index_equal(result.get_level_values(3), idx4)

    result2 = pd.MultiIndex.from_arrays(
        [pd.Series(idx1),
         pd.Series(idx2),
         pd.Series(idx3),
         pd.Series(idx4)])
    tm.assert_index_equal(result2.get_level_values(0), idx1)
    tm.assert_index_equal(result2.get_level_values(1), idx2)
    tm.assert_index_equal(result2.get_level_values(2), idx3)
    tm.assert_index_equal(result2.get_level_values(3), idx4)

    tm.assert_index_equal(result, result2)
Пример #36
0
    def test_dups_fancy_indexing(self):

        # GH 3455

        df = tm.makeCustomDataframe(10, 3)
        df.columns = ["a", "a", "b"]
        result = df[["b", "a"]].columns
        expected = Index(["b", "a", "a"])
        tm.assert_index_equal(result, expected)

        # across dtypes
        df = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]],
                       columns=list("aaaaaaa"))
        df.head()
        str(df)
        result = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]])
        result.columns = list("aaaaaaa")

        # TODO(wesm): unused?
        df_v = df.iloc[:, 4]  # noqa
        res_v = result.iloc[:, 4]  # noqa

        tm.assert_frame_equal(df, result)

        # GH 3561, dups not in selected order
        df = DataFrame(
            {
                "test": [5, 7, 9, 11],
                "test1": [4.0, 5, 6, 7],
                "other": list("abcd")
            },
            index=["A", "A", "B", "C"],
        )
        rows = ["C", "B"]
        expected = DataFrame(
            {
                "test": [11, 9],
                "test1": [7.0, 6],
                "other": ["d", "c"]
            },
            index=rows)
        result = df.loc[rows]
        tm.assert_frame_equal(result, expected)

        result = df.loc[Index(rows)]
        tm.assert_frame_equal(result, expected)

        rows = ["C", "B", "E"]
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[rows]

        # see GH5553, make sure we use the right indexer
        rows = ["F", "G", "H", "C", "B", "E"]
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[rows]

        # List containing only missing label
        dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD"))
        with pytest.raises(
                KeyError,
                match=re.escape(
                    "\"None of [Index(['E'], dtype='object')] are in the [index]\""
                ),
        ):
            dfnu.loc[["E"]]

        # ToDo: check_index_type can be True after GH 11497

        # GH 4619; duplicate indexer with missing label
        df = DataFrame({"A": [0, 1, 2]})
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[[0, 8, 0]]

        df = DataFrame({"A": list("abc")})
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[[0, 8, 0]]

        # non unique with non unique selector
        df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"])
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[["A", "A", "E"]]
Пример #37
0
    def test_nanosecond_field(self):
        dti = DatetimeIndex(np.arange(10))

        tm.assert_index_equal(dti.nanosecond, pd.Index(np.arange(10, dtype=np.int64)))
Пример #38
0
    def test_datetimeindex_accessors(self):
        dti_naive = pd.date_range(freq="D", start=datetime(1998, 1, 1), periods=365)
        # GH#13303
        dti_tz = pd.date_range(
            freq="D", start=datetime(1998, 1, 1), periods=365, tz="US/Eastern"
        )
        for dti in [dti_naive, dti_tz]:

            assert dti.year[0] == 1998
            assert dti.month[0] == 1
            assert dti.day[0] == 1
            assert dti.hour[0] == 0
            assert dti.minute[0] == 0
            assert dti.second[0] == 0
            assert dti.microsecond[0] == 0
            assert dti.dayofweek[0] == 3

            assert dti.dayofyear[0] == 1
            assert dti.dayofyear[120] == 121

            assert dti.weekofyear[0] == 1
            assert dti.weekofyear[120] == 18

            assert dti.quarter[0] == 1
            assert dti.quarter[120] == 2

            assert dti.days_in_month[0] == 31
            assert dti.days_in_month[90] == 30

            assert dti.is_month_start[0]
            assert not dti.is_month_start[1]
            assert dti.is_month_start[31]
            assert dti.is_quarter_start[0]
            assert dti.is_quarter_start[90]
            assert dti.is_year_start[0]
            assert not dti.is_year_start[364]
            assert not dti.is_month_end[0]
            assert dti.is_month_end[30]
            assert not dti.is_month_end[31]
            assert dti.is_month_end[364]
            assert not dti.is_quarter_end[0]
            assert not dti.is_quarter_end[30]
            assert dti.is_quarter_end[89]
            assert dti.is_quarter_end[364]
            assert not dti.is_year_end[0]
            assert dti.is_year_end[364]

            assert len(dti.year) == 365
            assert len(dti.month) == 365
            assert len(dti.day) == 365
            assert len(dti.hour) == 365
            assert len(dti.minute) == 365
            assert len(dti.second) == 365
            assert len(dti.microsecond) == 365
            assert len(dti.dayofweek) == 365
            assert len(dti.dayofyear) == 365
            assert len(dti.weekofyear) == 365
            assert len(dti.quarter) == 365
            assert len(dti.is_month_start) == 365
            assert len(dti.is_month_end) == 365
            assert len(dti.is_quarter_start) == 365
            assert len(dti.is_quarter_end) == 365
            assert len(dti.is_year_start) == 365
            assert len(dti.is_year_end) == 365

            dti.name = "name"

            # non boolean accessors -> return Index
            for accessor in DatetimeIndex._field_ops:
                res = getattr(dti, accessor)
                assert len(res) == 365
                assert isinstance(res, Index)
                assert res.name == "name"

            # boolean accessors -> return array
            for accessor in DatetimeIndex._bool_ops:
                res = getattr(dti, accessor)
                assert len(res) == 365
                assert isinstance(res, np.ndarray)

            # test boolean indexing
            res = dti[dti.is_quarter_start]
            exp = dti[[0, 90, 181, 273]]
            tm.assert_index_equal(res, exp)
            res = dti[dti.is_leap_year]
            exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name")
            tm.assert_index_equal(res, exp)

        dti = pd.date_range(freq="BQ-FEB", start=datetime(1998, 1, 1), periods=4)

        assert sum(dti.is_quarter_start) == 0
        assert sum(dti.is_quarter_end) == 4
        assert sum(dti.is_year_start) == 0
        assert sum(dti.is_year_end) == 1

        # Ensure is_start/end accessors throw ValueError for CustomBusinessDay,
        bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu")
        dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
        msg = "Custom business days is not supported by is_month_start"
        with pytest.raises(ValueError, match=msg):
            dti.is_month_start

        dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"])

        assert dti.is_month_start[0] == 1

        tests = [
            (Timestamp("2013-06-01", freq="M").is_month_start, 1),
            (Timestamp("2013-06-01", freq="BM").is_month_start, 0),
            (Timestamp("2013-06-03", freq="M").is_month_start, 0),
            (Timestamp("2013-06-03", freq="BM").is_month_start, 1),
            (Timestamp("2013-02-28", freq="Q-FEB").is_month_end, 1),
            (Timestamp("2013-02-28", freq="Q-FEB").is_quarter_end, 1),
            (Timestamp("2013-02-28", freq="Q-FEB").is_year_end, 1),
            (Timestamp("2013-03-01", freq="Q-FEB").is_month_start, 1),
            (Timestamp("2013-03-01", freq="Q-FEB").is_quarter_start, 1),
            (Timestamp("2013-03-01", freq="Q-FEB").is_year_start, 1),
            (Timestamp("2013-03-31", freq="QS-FEB").is_month_end, 1),
            (Timestamp("2013-03-31", freq="QS-FEB").is_quarter_end, 0),
            (Timestamp("2013-03-31", freq="QS-FEB").is_year_end, 0),
            (Timestamp("2013-02-01", freq="QS-FEB").is_month_start, 1),
            (Timestamp("2013-02-01", freq="QS-FEB").is_quarter_start, 1),
            (Timestamp("2013-02-01", freq="QS-FEB").is_year_start, 1),
            (Timestamp("2013-06-30", freq="BQ").is_month_end, 0),
            (Timestamp("2013-06-30", freq="BQ").is_quarter_end, 0),
            (Timestamp("2013-06-30", freq="BQ").is_year_end, 0),
            (Timestamp("2013-06-28", freq="BQ").is_month_end, 1),
            (Timestamp("2013-06-28", freq="BQ").is_quarter_end, 1),
            (Timestamp("2013-06-28", freq="BQ").is_year_end, 0),
            (Timestamp("2013-06-30", freq="BQS-APR").is_month_end, 0),
            (Timestamp("2013-06-30", freq="BQS-APR").is_quarter_end, 0),
            (Timestamp("2013-06-30", freq="BQS-APR").is_year_end, 0),
            (Timestamp("2013-06-28", freq="BQS-APR").is_month_end, 1),
            (Timestamp("2013-06-28", freq="BQS-APR").is_quarter_end, 1),
            (Timestamp("2013-03-29", freq="BQS-APR").is_year_end, 1),
            (Timestamp("2013-11-01", freq="AS-NOV").is_year_start, 1),
            (Timestamp("2013-10-31", freq="AS-NOV").is_year_end, 1),
            (Timestamp("2012-02-01").days_in_month, 29),
            (Timestamp("2013-02-01").days_in_month, 28),
        ]

        for ts, value in tests:
            assert ts == value

        # GH 6538: Check that DatetimeIndex and its TimeStamp elements
        # return the same weekofyear accessor close to new year w/ tz
        dates = ["2013/12/29", "2013/12/30", "2013/12/31"]
        dates = DatetimeIndex(dates, tz="Europe/Brussels")
        expected = [52, 1, 1]
        assert dates.weekofyear.tolist() == expected
        assert [d.weekofyear for d in dates] == expected
Пример #39
0
    def test_duplicated_drop_duplicates_index(self):
        # GH 4060
        for original in self.objs:
            if isinstance(original, Index):

                # special case
                if original.is_boolean():
                    result = original.drop_duplicates()
                    expected = Index([False, True], name='a')
                    tm.assert_index_equal(result, expected)
                    continue

                # original doesn't have duplicates
                expected = np.array([False] * len(original), dtype=bool)
                duplicated = original.duplicated()
                tm.assert_numpy_array_equal(duplicated, expected)
                self.assertTrue(duplicated.dtype == bool)
                result = original.drop_duplicates()
                tm.assert_index_equal(result, original)
                self.assertFalse(result is original)

                # has_duplicates
                self.assertFalse(original.has_duplicates)

                # create repeated values, 3rd and 5th values are duplicated
                idx = original[list(range(len(original))) + [5, 3]]
                expected = np.array([False] * len(original) + [True, True],
                                    dtype=bool)
                duplicated = idx.duplicated()
                tm.assert_numpy_array_equal(duplicated, expected)
                self.assertTrue(duplicated.dtype == bool)
                tm.assert_index_equal(idx.drop_duplicates(), original)

                base = [False] * len(idx)
                base[3] = True
                base[5] = True
                expected = np.array(base)

                duplicated = idx.duplicated(keep='last')
                tm.assert_numpy_array_equal(duplicated, expected)
                self.assertTrue(duplicated.dtype == bool)
                result = idx.drop_duplicates(keep='last')
                tm.assert_index_equal(result, idx[~expected])

                # deprecate take_last
                with tm.assert_produces_warning(FutureWarning):
                    duplicated = idx.duplicated(take_last=True)
                tm.assert_numpy_array_equal(duplicated, expected)
                self.assertTrue(duplicated.dtype == bool)
                with tm.assert_produces_warning(FutureWarning):
                    result = idx.drop_duplicates(take_last=True)
                tm.assert_index_equal(result, idx[~expected])

                base = [False] * len(original) + [True, True]
                base[3] = True
                base[5] = True
                expected = np.array(base)

                duplicated = idx.duplicated(keep=False)
                tm.assert_numpy_array_equal(duplicated, expected)
                self.assertTrue(duplicated.dtype == bool)
                result = idx.drop_duplicates(keep=False)
                tm.assert_index_equal(result, idx[~expected])

                with tm.assertRaisesRegexp(
                        TypeError, r"drop_duplicates\(\) got an unexpected "
                        "keyword argument"):
                    idx.drop_duplicates(inplace=True)

            else:
                expected = Series([False] * len(original),
                                  index=original.index,
                                  name='a')
                tm.assert_series_equal(original.duplicated(), expected)
                result = original.drop_duplicates()
                tm.assert_series_equal(result, original)
                self.assertFalse(result is original)

                idx = original.index[list(range(len(original))) + [5, 3]]
                values = original._values[list(range(len(original))) + [5, 3]]
                s = Series(values, index=idx, name='a')

                expected = Series([False] * len(original) + [True, True],
                                  index=idx,
                                  name='a')
                tm.assert_series_equal(s.duplicated(), expected)
                tm.assert_series_equal(s.drop_duplicates(), original)

                base = [False] * len(idx)
                base[3] = True
                base[5] = True
                expected = Series(base, index=idx, name='a')

                tm.assert_series_equal(s.duplicated(keep='last'), expected)
                tm.assert_series_equal(s.drop_duplicates(keep='last'),
                                       s[~np.array(base)])

                # deprecate take_last
                with tm.assert_produces_warning(FutureWarning):
                    tm.assert_series_equal(s.duplicated(take_last=True),
                                           expected)
                with tm.assert_produces_warning(FutureWarning):
                    tm.assert_series_equal(s.drop_duplicates(take_last=True),
                                           s[~np.array(base)])
                base = [False] * len(original) + [True, True]
                base[3] = True
                base[5] = True
                expected = Series(base, index=idx, name='a')

                tm.assert_series_equal(s.duplicated(keep=False), expected)
                tm.assert_series_equal(s.drop_duplicates(keep=False),
                                       s[~np.array(base)])

                s.drop_duplicates(inplace=True)
                tm.assert_series_equal(s, original)
Пример #40
0
    def test_range_edges(self):
        # GH#13672
        idx = pd.date_range(
            start=Timestamp("1970-01-01 00:00:00.000000001"),
            end=Timestamp("1970-01-01 00:00:00.000000004"),
            freq="N",
        )
        exp = DatetimeIndex(
            [
                "1970-01-01 00:00:00.000000001",
                "1970-01-01 00:00:00.000000002",
                "1970-01-01 00:00:00.000000003",
                "1970-01-01 00:00:00.000000004",
            ]
        )
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(
            start=Timestamp("1970-01-01 00:00:00.000000004"),
            end=Timestamp("1970-01-01 00:00:00.000000001"),
            freq="N",
        )
        exp = DatetimeIndex([])
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(
            start=Timestamp("1970-01-01 00:00:00.000000001"),
            end=Timestamp("1970-01-01 00:00:00.000000001"),
            freq="N",
        )
        exp = DatetimeIndex(["1970-01-01 00:00:00.000000001"])
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(
            start=Timestamp("1970-01-01 00:00:00.000001"),
            end=Timestamp("1970-01-01 00:00:00.000004"),
            freq="U",
        )
        exp = DatetimeIndex(
            [
                "1970-01-01 00:00:00.000001",
                "1970-01-01 00:00:00.000002",
                "1970-01-01 00:00:00.000003",
                "1970-01-01 00:00:00.000004",
            ]
        )
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(
            start=Timestamp("1970-01-01 00:00:00.001"),
            end=Timestamp("1970-01-01 00:00:00.004"),
            freq="L",
        )
        exp = DatetimeIndex(
            [
                "1970-01-01 00:00:00.001",
                "1970-01-01 00:00:00.002",
                "1970-01-01 00:00:00.003",
                "1970-01-01 00:00:00.004",
            ]
        )
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(
            start=Timestamp("1970-01-01 00:00:01"),
            end=Timestamp("1970-01-01 00:00:04"),
            freq="S",
        )
        exp = DatetimeIndex(
            [
                "1970-01-01 00:00:01",
                "1970-01-01 00:00:02",
                "1970-01-01 00:00:03",
                "1970-01-01 00:00:04",
            ]
        )
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(
            start=Timestamp("1970-01-01 00:01"),
            end=Timestamp("1970-01-01 00:04"),
            freq="T",
        )
        exp = DatetimeIndex(
            [
                "1970-01-01 00:01",
                "1970-01-01 00:02",
                "1970-01-01 00:03",
                "1970-01-01 00:04",
            ]
        )
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(
            start=Timestamp("1970-01-01 01:00"),
            end=Timestamp("1970-01-01 04:00"),
            freq="H",
        )
        exp = DatetimeIndex(
            [
                "1970-01-01 01:00",
                "1970-01-01 02:00",
                "1970-01-01 03:00",
                "1970-01-01 04:00",
            ]
        )
        tm.assert_index_equal(idx, exp)

        idx = pd.date_range(
            start=Timestamp("1970-01-01"), end=Timestamp("1970-01-04"), freq="D"
        )
        exp = DatetimeIndex(["1970-01-01", "1970-01-02", "1970-01-03", "1970-01-04"])
        tm.assert_index_equal(idx, exp)
Пример #41
0
    def test_value_counts_bins(self):
        klasses = [Index, Series]
        for klass in klasses:
            s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
            s = klass(s_values)

            # bins
            self.assertRaises(TypeError,
                              lambda bins: s.value_counts(bins=bins), 1)

            s1 = Series([1, 1, 2, 3])
            res1 = s1.value_counts(bins=1)
            exp1 = Series({0.998: 4})
            tm.assert_series_equal(res1, exp1)
            res1n = s1.value_counts(bins=1, normalize=True)
            exp1n = Series({0.998: 1.0})
            tm.assert_series_equal(res1n, exp1n)

            if isinstance(s1, Index):
                tm.assert_index_equal(s1.unique(), Index([1, 2, 3]))
            else:
                exp = np.array([1, 2, 3], dtype=np.int64)
                tm.assert_numpy_array_equal(s1.unique(), exp)

            self.assertEqual(s1.nunique(), 3)

            res4 = s1.value_counts(bins=4)
            exp4 = Series({
                0.998: 2,
                1.5: 1,
                2.0: 0,
                2.5: 1
            },
                          index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4, exp4)
            res4n = s1.value_counts(bins=4, normalize=True)
            exp4n = Series({
                0.998: 0.5,
                1.5: 0.25,
                2.0: 0.0,
                2.5: 0.25
            },
                           index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4n, exp4n)

            # handle NA's properly
            s_values = [
                'a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b'
            ]
            s = klass(s_values)
            expected = Series([4, 3, 2], index=['b', 'a', 'd'])
            tm.assert_series_equal(s.value_counts(), expected)

            if isinstance(s, Index):
                exp = Index(['a', 'b', np.nan, 'd'])
                tm.assert_index_equal(s.unique(), exp)
            else:
                exp = np.array(['a', 'b', np.nan, 'd'], dtype=object)
                tm.assert_numpy_array_equal(s.unique(), exp)
            self.assertEqual(s.nunique(), 3)

            s = klass({})
            expected = Series([], dtype=np.int64)
            tm.assert_series_equal(s.value_counts(),
                                   expected,
                                   check_index_type=False)
            # returned dtype differs depending on original
            if isinstance(s, Index):
                self.assert_index_equal(s.unique(), Index([]), exact=False)
            else:
                self.assert_numpy_array_equal(s.unique(),
                                              np.array([]),
                                              check_dtype=False)

            self.assertEqual(s.nunique(), 0)
Пример #42
0
 def test_calculated_against_csv(self):
     assert_index_equal(self.calendar.schedule.index, self.answers.index)
Пример #43
0
 def test_transpose(self):
     for obj in self.objs:
         if isinstance(obj, Index):
             tm.assert_index_equal(obj.transpose(), obj)
         else:
             tm.assert_series_equal(obj.transpose(), obj)
Пример #44
0
    def test_value_counts_datetime64(self):
        klasses = [Index, Series]
        for klass in klasses:
            # GH 3002, datetime64[ns]
            # don't test names though
            txt = "\n".join([
                'xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG',
                'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM'
            ])
            f = StringIO(txt)
            df = pd.read_fwf(f,
                             widths=[6, 8, 3],
                             names=["person_id", "dt", "food"],
                             parse_dates=["dt"])

            s = klass(df['dt'].copy())
            s.name = None

            idx = pd.to_datetime([
                '2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z',
                '2009-01-01 00:00:00X'
            ])
            expected_s = Series([3, 2, 1], index=idx)
            tm.assert_series_equal(s.value_counts(), expected_s)

            expected = np_array_datetime64_compat([
                '2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z',
                '2008-09-09 00:00:00Z'
            ],
                                                  dtype='datetime64[ns]')
            if isinstance(s, Index):
                tm.assert_index_equal(s.unique(), DatetimeIndex(expected))
            else:
                tm.assert_numpy_array_equal(s.unique(), expected)

            self.assertEqual(s.nunique(), 3)

            # with NaT
            s = df['dt'].copy()
            s = klass([v for v in s.values] + [pd.NaT])

            result = s.value_counts()
            self.assertEqual(result.index.dtype, 'datetime64[ns]')
            tm.assert_series_equal(result, expected_s)

            result = s.value_counts(dropna=False)
            expected_s[pd.NaT] = 1
            tm.assert_series_equal(result, expected_s)

            unique = s.unique()
            self.assertEqual(unique.dtype, 'datetime64[ns]')

            # numpy_array_equal cannot compare pd.NaT
            if isinstance(s, Index):
                exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT])
                tm.assert_index_equal(unique, exp_idx)
            else:
                tm.assert_numpy_array_equal(unique[:3], expected)
                self.assertTrue(pd.isnull(unique[3]))

            self.assertEqual(s.nunique(), 3)
            self.assertEqual(s.nunique(dropna=False), 4)

            # timedelta64[ns]
            td = df.dt - df.dt + timedelta(1)
            td = klass(td, name='dt')

            result = td.value_counts()
            expected_s = Series([6], index=[Timedelta('1day')], name='dt')
            tm.assert_series_equal(result, expected_s)

            expected = TimedeltaIndex(['1 days'], name='dt')
            if isinstance(td, Index):
                tm.assert_index_equal(td.unique(), expected)
            else:
                tm.assert_numpy_array_equal(td.unique(), expected.values)

            td2 = timedelta(1) + (df.dt - df.dt)
            td2 = klass(td2, name='dt')
            result2 = td2.value_counts()
            tm.assert_series_equal(result2, expected_s)
Пример #45
0
 def test_intersection_bug(self):
     # GH #771
     a = bdate_range('11/30/2011', '12/31/2011', freq='C')
     b = bdate_range('12/10/2011', '12/20/2011', freq='C')
     result = a.intersection(b)
     tm.assert_index_equal(result, b)
Пример #46
0
    def test_value_counts_unique_nunique_null(self):

        for null_obj in [np.nan, None]:
            for orig in self.objs:
                o = orig.copy()
                klass = type(o)
                values = o._values

                if not self._allow_na_ops(o):
                    continue

                # special assign to the numpy array
                if is_datetimetz(o):
                    if isinstance(o, DatetimeIndex):
                        v = o.asi8
                        v[0:2] = pd.tslib.iNaT
                        values = o._shallow_copy(v)
                    else:
                        o = o.copy()
                        o[0:2] = pd.tslib.iNaT
                        values = o._values

                elif needs_i8_conversion(o):
                    values[0:2] = pd.tslib.iNaT
                    values = o._shallow_copy(values)
                else:
                    values[0:2] = null_obj
                # check values has the same dtype as the original

                self.assertEqual(values.dtype, o.dtype)

                # create repeated values, 'n'th element is repeated by n+1
                # times
                if isinstance(o, (DatetimeIndex, PeriodIndex)):
                    expected_index = o.copy()
                    expected_index.name = None

                    # attach name to klass
                    o = klass(values.repeat(range(1, len(o) + 1)))
                    o.name = 'a'
                else:
                    if is_datetimetz(o):
                        expected_index = orig._values._shallow_copy(values)
                    else:
                        expected_index = pd.Index(values)
                    expected_index.name = None
                    o = o.repeat(range(1, len(o) + 1))
                    o.name = 'a'

                # check values has the same dtype as the original
                self.assertEqual(o.dtype, orig.dtype)
                # check values correctly have NaN
                nanloc = np.zeros(len(o), dtype=np.bool)
                nanloc[:3] = True
                if isinstance(o, Index):
                    self.assert_numpy_array_equal(pd.isnull(o), nanloc)
                else:
                    exp = pd.Series(nanloc, o.index, name='a')
                    self.assert_series_equal(pd.isnull(o), exp)

                expected_s_na = Series(list(range(10, 2, -1)) + [3],
                                       index=expected_index[9:0:-1],
                                       dtype='int64',
                                       name='a')
                expected_s = Series(list(range(10, 2, -1)),
                                    index=expected_index[9:1:-1],
                                    dtype='int64',
                                    name='a')

                result_s_na = o.value_counts(dropna=False)
                tm.assert_series_equal(result_s_na, expected_s_na)
                self.assertTrue(result_s_na.index.name is None)
                self.assertEqual(result_s_na.name, 'a')
                result_s = o.value_counts()
                tm.assert_series_equal(o.value_counts(), expected_s)
                self.assertTrue(result_s.index.name is None)
                self.assertEqual(result_s.name, 'a')

                result = o.unique()
                if isinstance(o, Index):
                    tm.assert_index_equal(result, Index(values[1:], name='a'))
                elif is_datetimetz(o):
                    # unable to compare NaT / nan
                    tm.assert_numpy_array_equal(result[1:],
                                                values[2:].asobject.values)
                    self.assertIs(result[0], pd.NaT)
                else:
                    tm.assert_numpy_array_equal(result[1:], values[2:])

                    self.assertTrue(pd.isnull(result[0]))
                    self.assertEqual(result.dtype, orig.dtype)

                self.assertEqual(o.nunique(), 8)
                self.assertEqual(o.nunique(dropna=False), 9)
Пример #47
0
    def test_slice_specialised(self):

        # scalar indexing
        res = self.index[1]
        expected = 2
        assert res == expected

        res = self.index[-1]
        expected = 18
        assert res == expected

        # slicing
        # slice value completion
        index = self.index[:]
        expected = self.index
        tm.assert_index_equal(index, expected)

        # positive slice values
        index = self.index[7:10:2]
        expected = Index(np.array([14, 18]), name='foo')
        tm.assert_index_equal(index, expected)

        # negative slice values
        index = self.index[-1:-5:-2]
        expected = Index(np.array([18, 14]), name='foo')
        tm.assert_index_equal(index, expected)

        # stop overshoot
        index = self.index[2:100:4]
        expected = Index(np.array([4, 12]), name='foo')
        tm.assert_index_equal(index, expected)

        # reverse
        index = self.index[::-1]
        expected = Index(self.index.values[::-1], name='foo')
        tm.assert_index_equal(index, expected)

        index = self.index[-8::-1]
        expected = Index(np.array([4, 2, 0]), name='foo')
        tm.assert_index_equal(index, expected)

        index = self.index[-40::-1]
        expected = Index(np.array([], dtype=np.int64), name='foo')
        tm.assert_index_equal(index, expected)

        index = self.index[40::-1]
        expected = Index(self.index.values[40::-1], name='foo')
        tm.assert_index_equal(index, expected)

        index = self.index[10::-1]
        expected = Index(self.index.values[::-1], name='foo')
        tm.assert_index_equal(index, expected)
Пример #48
0
    def test_intersection(self):
        # GH 4690 (with tz)
        for tz in [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']:
            base = date_range('6/1/2000', '6/30/2000', freq='D', name='idx')

            # if target has the same name, it is preserved
            rng2 = date_range('5/15/2000', '6/20/2000', freq='D', name='idx')
            expected2 = date_range('6/1/2000',
                                   '6/20/2000',
                                   freq='D',
                                   name='idx')

            # if target name is different, it will be reset
            rng3 = date_range('5/15/2000', '6/20/2000', freq='D', name='other')
            expected3 = date_range('6/1/2000',
                                   '6/20/2000',
                                   freq='D',
                                   name=None)

            rng4 = date_range('7/1/2000', '7/31/2000', freq='D', name='idx')
            expected4 = DatetimeIndex([], name='idx')

            for (rng, expected) in [(rng2, expected2), (rng3, expected3),
                                    (rng4, expected4)]:
                result = base.intersection(rng)
                tm.assert_index_equal(result, expected)
                assert result.name == expected.name
                assert result.freq == expected.freq
                assert result.tz == expected.tz

            # non-monotonic
            base = DatetimeIndex(
                ['2011-01-05', '2011-01-04', '2011-01-02', '2011-01-03'],
                tz=tz,
                name='idx')

            rng2 = DatetimeIndex(
                ['2011-01-04', '2011-01-02', '2011-02-02', '2011-02-03'],
                tz=tz,
                name='idx')
            expected2 = DatetimeIndex(['2011-01-04', '2011-01-02'],
                                      tz=tz,
                                      name='idx')

            rng3 = DatetimeIndex(
                ['2011-01-04', '2011-01-02', '2011-02-02', '2011-02-03'],
                tz=tz,
                name='other')
            expected3 = DatetimeIndex(['2011-01-04', '2011-01-02'],
                                      tz=tz,
                                      name=None)

            # GH 7880
            rng4 = date_range('7/1/2000',
                              '7/31/2000',
                              freq='D',
                              tz=tz,
                              name='idx')
            expected4 = DatetimeIndex([], tz=tz, name='idx')

            for (rng, expected) in [(rng2, expected2), (rng3, expected3),
                                    (rng4, expected4)]:
                result = base.intersection(rng)
                tm.assert_index_equal(result, expected)
                assert result.name == expected.name
                assert result.freq is None
                assert result.tz == expected.tz

        # empty same freq GH2129
        rng = date_range('6/1/2000', '6/15/2000', freq='T')
        result = rng[0:0].intersection(rng)
        assert len(result) == 0

        result = rng.intersection(rng[0:0])
        assert len(result) == 0
Пример #49
0
 def test_repr_roundtrip(self):
     tm.assert_index_equal(eval(repr(self.index)), self.index)
Пример #50
0
 def test_union(self):
     i1 = Int64Index(np.arange(0, 20, 2))
     i2 = Int64Index(np.arange(10, 30, 2))
     result = i1.union(i2)
     expected = Int64Index(np.arange(0, 30, 2))
     tm.assert_index_equal(result, expected)
Пример #51
0
    def test_intersection(self):
        # intersect with Int64Index
        other = Index(np.arange(1, 6))
        result = self.index.intersection(other)
        expected = Index(
            np.sort(np.intersect1d(self.index.values, other.values)))
        tm.assert_index_equal(result, expected)

        result = other.intersection(self.index)
        expected = Index(
            np.sort(np.asarray(np.intersect1d(self.index.values,
                                              other.values))))
        tm.assert_index_equal(result, expected)

        # intersect with increasing RangeIndex
        other = RangeIndex(1, 6)
        result = self.index.intersection(other)
        expected = Index(
            np.sort(np.intersect1d(self.index.values, other.values)))
        tm.assert_index_equal(result, expected)

        # intersect with decreasing RangeIndex
        other = RangeIndex(5, 0, -1)
        result = self.index.intersection(other)
        expected = Index(
            np.sort(np.intersect1d(self.index.values, other.values)))
        tm.assert_index_equal(result, expected)

        # reversed (GH 17296)
        result = other.intersection(self.index)
        tm.assert_index_equal(result, expected)

        # GH 17296: intersect two decreasing RangeIndexes
        first = RangeIndex(10, -2, -2)
        other = RangeIndex(5, -4, -1)
        expected = first.astype(int).intersection(other.astype(int))
        result = first.intersection(other).astype(int)
        tm.assert_index_equal(result, expected)

        # reversed
        result = other.intersection(first).astype(int)
        tm.assert_index_equal(result, expected)

        index = RangeIndex(5)

        # intersect of non-overlapping indices
        other = RangeIndex(5, 10, 1)
        result = index.intersection(other)
        expected = RangeIndex(0, 0, 1)
        tm.assert_index_equal(result, expected)

        other = RangeIndex(-1, -5, -1)
        result = index.intersection(other)
        expected = RangeIndex(0, 0, 1)
        tm.assert_index_equal(result, expected)

        # intersection of empty indices
        other = RangeIndex(0, 0, 1)
        result = index.intersection(other)
        expected = RangeIndex(0, 0, 1)
        tm.assert_index_equal(result, expected)

        result = other.intersection(index)
        tm.assert_index_equal(result, expected)

        # intersection of non-overlapping values based on start value and gcd
        index = RangeIndex(1, 10, 2)
        other = RangeIndex(0, 10, 4)
        result = index.intersection(other)
        expected = RangeIndex(0, 0, 1)
        tm.assert_index_equal(result, expected)
Пример #52
0
 def test_ufunc_compat(self):
     idx = RangeIndex(5)
     result = np.sin(idx)
     expected = Float64Index(np.sin(np.arange(5, dtype='int64')))
     tm.assert_index_equal(result, expected)
Пример #53
0
    def test_numeric_compat2(self):
        # validate that we are handling the RangeIndex overrides to numeric ops
        # and returning RangeIndex where possible

        idx = RangeIndex(0, 10, 2)

        result = idx * 2
        expected = RangeIndex(0, 20, 4)
        tm.assert_index_equal(result, expected, exact=True)

        result = idx + 2
        expected = RangeIndex(2, 12, 2)
        tm.assert_index_equal(result, expected, exact=True)

        result = idx - 2
        expected = RangeIndex(-2, 8, 2)
        tm.assert_index_equal(result, expected, exact=True)

        # truediv under PY3
        result = idx / 2

        if PY3:
            expected = RangeIndex(0, 5, 1).astype('float64')
        else:
            expected = RangeIndex(0, 5, 1)
        tm.assert_index_equal(result, expected, exact=True)

        result = idx / 4
        expected = RangeIndex(0, 10, 2) / 4
        tm.assert_index_equal(result, expected, exact=True)

        result = idx // 1
        expected = idx
        tm.assert_index_equal(result, expected, exact=True)

        # __mul__
        result = idx * idx
        expected = Index(idx.values * idx.values)
        tm.assert_index_equal(result, expected, exact=True)

        # __pow__
        idx = RangeIndex(0, 1000, 2)
        result = idx**2
        expected = idx._int64index**2
        tm.assert_index_equal(Index(result.values), expected, exact=True)

        # __floordiv__
        cases_exact = [
            (RangeIndex(0, 1000, 2), 2, RangeIndex(0, 500, 1)),
            (RangeIndex(-99, -201, -3), -3, RangeIndex(33, 67, 1)),
            (RangeIndex(0, 1000,
                        1), 2, RangeIndex(0, 1000, 1)._int64index // 2),
            (RangeIndex(0, 100,
                        1), 2.0, RangeIndex(0, 100, 1)._int64index // 2.0),
            (RangeIndex(0), 50, RangeIndex(0)),
            (RangeIndex(2, 4, 2), 3, RangeIndex(0, 1, 1)),
            (RangeIndex(-5, -10, -6), 4, RangeIndex(-2, -1, 1)),
            (RangeIndex(-100, -200, 3), 2, RangeIndex(0))
        ]
        for idx, div, expected in cases_exact:
            tm.assert_index_equal(idx // div, expected, exact=True)
Пример #54
0
    def test_constructor(self):
        index = RangeIndex(5)
        expected = np.arange(5, dtype=np.int64)
        assert isinstance(index, RangeIndex)
        assert index._start == 0
        assert index._stop == 5
        assert index._step == 1
        assert index.name is None
        tm.assert_index_equal(Index(expected), index)

        index = RangeIndex(1, 5)
        expected = np.arange(1, 5, dtype=np.int64)
        assert isinstance(index, RangeIndex)
        assert index._start == 1
        tm.assert_index_equal(Index(expected), index)

        index = RangeIndex(1, 5, 2)
        expected = np.arange(1, 5, 2, dtype=np.int64)
        assert isinstance(index, RangeIndex)
        assert index._step == 2
        tm.assert_index_equal(Index(expected), index)

        msg = "RangeIndex\\(\\.\\.\\.\\) must be called with integers"
        with tm.assert_raises_regex(TypeError, msg):
            RangeIndex()

        for index in [
                RangeIndex(0),
                RangeIndex(start=0),
                RangeIndex(stop=0),
                RangeIndex(0, 0)
        ]:
            expected = np.empty(0, dtype=np.int64)
            assert isinstance(index, RangeIndex)
            assert index._start == 0
            assert index._stop == 0
            assert index._step == 1
            tm.assert_index_equal(Index(expected), index)

        with tm.assert_raises_regex(TypeError, msg):
            RangeIndex(name='Foo')

        for index in [
                RangeIndex(0, name='Foo'),
                RangeIndex(start=0, name='Foo'),
                RangeIndex(stop=0, name='Foo'),
                RangeIndex(0, 0, name='Foo')
        ]:
            assert isinstance(index, RangeIndex)
            assert index.name == 'Foo'

        # we don't allow on a bare Index
        pytest.raises(TypeError, lambda: Index(0, 1000))

        # invalid args
        for i in [
                Index(['a', 'b']),
                Series(['a', 'b']),
                np.array(['a', 'b']), [], 'foo',
                datetime(2000, 1, 1, 0, 0),
                np.arange(0, 10),
                np.array([1]), [1]
        ]:
            pytest.raises(TypeError, lambda: RangeIndex(i))
Пример #55
0
    def test_combineSeries(self):

        # Series
        series = self.frame.xs(self.frame.index[0])

        added = self.frame + series

        for key, s in compat.iteritems(added):
            assert_series_equal(s, self.frame[key] + series[key])

        larger_series = series.to_dict()
        larger_series['E'] = 1
        larger_series = Series(larger_series)
        larger_added = self.frame + larger_series

        for key, s in compat.iteritems(self.frame):
            assert_series_equal(larger_added[key], s + series[key])
        assert 'E' in larger_added
        assert np.isnan(larger_added['E']).all()

        # no upcast needed
        added = self.mixed_float + series
        _check_mixed_float(added)

        # vs mix (upcast) as needed
        added = self.mixed_float + series.astype('float32')
        _check_mixed_float(added, dtype=dict(C=None))
        added = self.mixed_float + series.astype('float16')
        _check_mixed_float(added, dtype=dict(C=None))

        # these raise with numexpr.....as we are adding an int64 to an
        # uint64....weird vs int

        # added = self.mixed_int + (100*series).astype('int64')
        # _check_mixed_int(added, dtype = dict(A = 'int64', B = 'float64', C =
        # 'int64', D = 'int64'))
        # added = self.mixed_int + (100*series).astype('int32')
        # _check_mixed_int(added, dtype = dict(A = 'int32', B = 'float64', C =
        # 'int32', D = 'int64'))

        # TimeSeries
        ts = self.tsframe['A']

        # 10890
        # we no longer allow auto timeseries broadcasting
        # and require explicit broadcasting
        added = self.tsframe.add(ts, axis='index')

        for key, col in compat.iteritems(self.tsframe):
            result = col + ts
            assert_series_equal(added[key], result, check_names=False)
            assert added[key].name == key
            if col.name == ts.name:
                assert result.name == 'A'
            else:
                assert result.name is None

        smaller_frame = self.tsframe[:-5]
        smaller_added = smaller_frame.add(ts, axis='index')

        tm.assert_index_equal(smaller_added.index, self.tsframe.index)

        smaller_ts = ts[:-5]
        smaller_added2 = self.tsframe.add(smaller_ts, axis='index')
        assert_frame_equal(smaller_added, smaller_added2)

        # length 0, result is all-nan
        result = self.tsframe.add(ts[:0], axis='index')
        expected = DataFrame(np.nan,
                             index=self.tsframe.index,
                             columns=self.tsframe.columns)
        assert_frame_equal(result, expected)

        # Frame is all-nan
        result = self.tsframe[:0].add(ts, axis='index')
        expected = DataFrame(np.nan,
                             index=self.tsframe.index,
                             columns=self.tsframe.columns)
        assert_frame_equal(result, expected)

        # empty but with non-empty index
        frame = self.tsframe[:1].reindex(columns=[])
        result = frame.mul(ts, axis='index')
        assert len(result) == len(ts)
Пример #56
0
    def test_join_non_int_index(self):
        other = Index([3, 6, 7, 8, 10], dtype=object)

        outer = self.index.join(other, how='outer')
        outer2 = other.join(self.index, how='outer')
        expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18])
        tm.assert_index_equal(outer, outer2)
        tm.assert_index_equal(outer, expected)

        inner = self.index.join(other, how='inner')
        inner2 = other.join(self.index, how='inner')
        expected = Index([6, 8, 10])
        tm.assert_index_equal(inner, inner2)
        tm.assert_index_equal(inner, expected)

        left = self.index.join(other, how='left')
        tm.assert_index_equal(left, self.index.astype(object))

        left2 = other.join(self.index, how='left')
        tm.assert_index_equal(left2, other)

        right = self.index.join(other, how='right')
        tm.assert_index_equal(right, other)

        right2 = other.join(self.index, how='right')
        tm.assert_index_equal(right2, self.index.astype(object))
Пример #57
0
 def test_integer_index_astype_datetime(self, tz, dtype):
     # GH 20997, 20964
     val = [pd.Timestamp('2018-01-01', tz=tz).value]
     result = pd.Index(val).astype(dtype)
     expected = pd.DatetimeIndex(['2018-01-01'], tz=tz)
     tm.assert_index_equal(result, expected)
Пример #58
0
    def test_to_csv_multiindex(self):

        frame = self.frame
        old_index = frame.index
        arrays = np.arange(len(old_index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays, names=["first", "second"])
        frame.index = new_index

        with ensure_clean("__tmp_to_csv_multiindex__") as path:

            frame.to_csv(path, header=False)
            frame.to_csv(path, columns=["A", "B"])

            # round trip
            frame.to_csv(path)

            df = self.read_csv(path, index_col=[0, 1], parse_dates=False)

            # TODO to_csv drops column name
            assert_frame_equal(frame, df, check_names=False)
            assert frame.index.names == df.index.names

            # needed if setUp becomes a class method
            self.frame.index = old_index

            # try multiindex with dates
            tsframe = self.tsframe
            old_index = tsframe.index
            new_index = [old_index, np.arange(len(old_index))]
            tsframe.index = MultiIndex.from_arrays(new_index)

            tsframe.to_csv(path, index_label=["time", "foo"])
            recons = self.read_csv(path, index_col=[0, 1])

            # TODO to_csv drops column name
            assert_frame_equal(tsframe, recons, check_names=False)

            # do not load index
            tsframe.to_csv(path)
            recons = self.read_csv(path, index_col=None)
            assert len(recons.columns) == len(tsframe.columns) + 2

            # no index
            tsframe.to_csv(path, index=False)
            recons = self.read_csv(path, index_col=None)
            assert_almost_equal(recons.values, self.tsframe.values)

            # needed if setUp becomes class method
            self.tsframe.index = old_index

        with ensure_clean("__tmp_to_csv_multiindex__") as path:
            # GH3571, GH1651, GH3141

            def _make_frame(names=None):
                if names is True:
                    names = ["first", "second"]
                return DataFrame(
                    np.random.randint(0, 10, size=(3, 3)),
                    columns=MultiIndex.from_tuples([("bah", "foo"),
                                                    ("bah", "bar"),
                                                    ("ban", "baz")],
                                                   names=names),
                    dtype="int64",
                )

            # column & index are multi-index
            df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
            df.to_csv(path)
            result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1])
            assert_frame_equal(df, result)

            # column is mi
            df = mkdf(5, 3, r_idx_nlevels=1, c_idx_nlevels=4)
            df.to_csv(path)
            result = read_csv(path, header=[0, 1, 2, 3], index_col=0)
            assert_frame_equal(df, result)

            # dup column names?
            df = mkdf(5, 3, r_idx_nlevels=3, c_idx_nlevels=4)
            df.to_csv(path)
            result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1, 2])
            assert_frame_equal(df, result)

            # writing with no index
            df = _make_frame()
            df.to_csv(path, index=False)
            result = read_csv(path, header=[0, 1])
            assert_frame_equal(df, result)

            # we lose the names here
            df = _make_frame(True)
            df.to_csv(path, index=False)
            result = read_csv(path, header=[0, 1])
            assert com.all_none(*result.columns.names)
            result.columns.names = df.columns.names
            assert_frame_equal(df, result)

            # whatsnew example
            df = _make_frame()
            df.to_csv(path)
            result = read_csv(path, header=[0, 1], index_col=[0])
            assert_frame_equal(df, result)

            df = _make_frame(True)
            df.to_csv(path)
            result = read_csv(path, header=[0, 1], index_col=[0])
            assert_frame_equal(df, result)

            # invalid options
            df = _make_frame(True)
            df.to_csv(path)

            for i in [6, 7]:
                msg = "len of {i}, but only 5 lines in file".format(i=i)
                with pytest.raises(ParserError, match=msg):
                    read_csv(path, header=list(range(i)), index_col=0)

            # write with cols
            msg = "cannot specify cols with a MultiIndex"
            with pytest.raises(TypeError, match=msg):
                df.to_csv(path, columns=["foo", "bar"])

        with ensure_clean("__tmp_to_csv_multiindex__") as path:
            # empty
            tsframe[:0].to_csv(path)
            recons = self.read_csv(path)

            exp = tsframe[:0]
            exp.index = []

            tm.assert_index_equal(recons.columns, exp.columns)
            assert len(recons) == 0
Пример #59
0
    def test_to_datetime_unit(self):

        epoch = 1370745748
        s = Series([epoch + t for t in range(20)])
        result = to_datetime(s, unit='s')
        expected = Series([
            Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
            for t in range(20)
        ])
        assert_series_equal(result, expected)

        s = Series([epoch + t for t in range(20)]).astype(float)
        result = to_datetime(s, unit='s')
        expected = Series([
            Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
            for t in range(20)
        ])
        assert_series_equal(result, expected)

        s = Series([epoch + t for t in range(20)] + [iNaT])
        result = to_datetime(s, unit='s')
        expected = Series([
            Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
            for t in range(20)
        ] + [NaT])
        assert_series_equal(result, expected)

        s = Series([epoch + t for t in range(20)] + [iNaT]).astype(float)
        result = to_datetime(s, unit='s')
        expected = Series([
            Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
            for t in range(20)
        ] + [NaT])
        assert_series_equal(result, expected)

        # GH13834
        s = Series([epoch + t
                    for t in np.arange(0, 2, .25)] + [iNaT]).astype(float)
        result = to_datetime(s, unit='s')
        expected = Series([
            Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
            for t in np.arange(0, 2, .25)
        ] + [NaT])
        assert_series_equal(result, expected)

        s = concat([
            Series([epoch + t for t in range(20)]).astype(float),
            Series([np.nan])
        ],
                   ignore_index=True)
        result = to_datetime(s, unit='s')
        expected = Series([
            Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
            for t in range(20)
        ] + [NaT])
        assert_series_equal(result, expected)

        result = to_datetime([1, 2, 'NaT', pd.NaT, np.nan], unit='D')
        expected = DatetimeIndex(
            [Timestamp('1970-01-02'),
             Timestamp('1970-01-03')] + ['NaT'] * 3)
        tm.assert_index_equal(result, expected)

        with pytest.raises(ValueError):
            to_datetime([1, 2, 'foo'], unit='D')
        with pytest.raises(ValueError):
            to_datetime([1, 2, 111111111], unit='D')

        # coerce we can process
        expected = DatetimeIndex(
            [Timestamp('1970-01-02'),
             Timestamp('1970-01-03')] + ['NaT'] * 1)
        result = to_datetime([1, 2, 'foo'], unit='D', errors='coerce')
        tm.assert_index_equal(result, expected)

        result = to_datetime([1, 2, 111111111], unit='D', errors='coerce')
        tm.assert_index_equal(result, expected)
Пример #60
0
    def test_combineFrame(self):
        frame_copy = self.frame.reindex(self.frame.index[::2])

        del frame_copy['D']
        frame_copy['C'][:5] = nan

        added = self.frame + frame_copy

        indexer = added['A'].dropna().index
        exp = (self.frame['A'] * 2).copy()

        tm.assert_series_equal(added['A'].dropna(), exp.loc[indexer])

        exp.loc[~exp.index.isin(indexer)] = np.nan
        tm.assert_series_equal(added['A'], exp.loc[added['A'].index])

        assert np.isnan(added['C'].reindex(frame_copy.index)[:5]).all()

        # assert(False)

        assert np.isnan(added['D']).all()

        self_added = self.frame + self.frame
        tm.assert_index_equal(self_added.index, self.frame.index)

        added_rev = frame_copy + self.frame
        assert np.isnan(added['D']).all()
        assert np.isnan(added_rev['D']).all()

        # corner cases

        # empty
        plus_empty = self.frame + self.empty
        assert np.isnan(plus_empty.values).all()

        empty_plus = self.empty + self.frame
        assert np.isnan(empty_plus.values).all()

        empty_empty = self.empty + self.empty
        assert empty_empty.empty

        # out of order
        reverse = self.frame.reindex(columns=self.frame.columns[::-1])

        assert_frame_equal(reverse + self.frame, self.frame * 2)

        # mix vs float64, upcast
        added = self.frame + self.mixed_float
        _check_mixed_float(added, dtype='float64')
        added = self.mixed_float + self.frame
        _check_mixed_float(added, dtype='float64')

        # mix vs mix
        added = self.mixed_float + self.mixed_float2
        _check_mixed_float(added, dtype=dict(C=None))
        added = self.mixed_float2 + self.mixed_float
        _check_mixed_float(added, dtype=dict(C=None))

        # with int
        added = self.frame + self.mixed_int
        _check_mixed_float(added, dtype='float64')