示例#1
0
    def test_coercing_dates_outside_of_datetime64_ns_bounds(self):
        invalid_dates = [
            datetime.date(1000, 1, 1),
            datetime.datetime(1000, 1, 1),
            '1000-01-01',
            'Jan 1, 1000',
            np.datetime64('1000-01-01'),
        ]

        for invalid_date in invalid_dates:
            self.assertRaises(
                ValueError,
                tslib.array_to_datetime,
                np.array([invalid_date], dtype='object'),
                errors='raise',
            )
            self.assert_numpy_array_equal(
                tslib.array_to_datetime(np.array([invalid_date],
                                                 dtype='object'),
                                        errors='coerce'),
                np.array([tslib.iNaT], dtype='M8[ns]'))

        arr = np.array(['1/1/1000', '1/1/2000'], dtype=object)
        self.assert_numpy_array_equal(
            tslib.array_to_datetime(arr, errors='coerce'),
            np_array_datetime64_compat(
                [tslib.iNaT, '2000-01-01T00:00:00.000000000-0000'],
                dtype='M8[ns]'))
示例#2
0
    def test_parsing_valid_dates(self):
        arr = np.array(["01-01-2013", "01-02-2013"], dtype=object)
        self.assert_numpy_array_equal(
            tslib.array_to_datetime(arr),
            np_array_datetime64_compat(
                ["2013-01-01T00:00:00.000000000-0000", "2013-01-02T00:00:00.000000000-0000"], dtype="M8[ns]"
            ),
        )

        arr = np.array(["Mon Sep 16 2013", "Tue Sep 17 2013"], dtype=object)
        self.assert_numpy_array_equal(
            tslib.array_to_datetime(arr),
            np_array_datetime64_compat(
                ["2013-09-16T00:00:00.000000000-0000", "2013-09-17T00:00:00.000000000-0000"], dtype="M8[ns]"
            ),
        )
示例#3
0
    def test_datetime64_dtype_array_returned(self):
        # GH 9431
        expected = np_array_datetime64_compat([
            '2015-01-03T00:00:00.000000000+0000',
            '2015-01-01T00:00:00.000000000+0000'
        ],
                                              dtype='M8[ns]')

        dt_index = pd.to_datetime([
            '2015-01-03T00:00:00.000000000+0000',
            '2015-01-01T00:00:00.000000000+0000',
            '2015-01-01T00:00:00.000000000+0000'
        ])
        result = algos.unique(dt_index)
        tm.assert_numpy_array_equal(result, expected)
        self.assertEqual(result.dtype, expected.dtype)

        s = pd.Series(dt_index)
        result = algos.unique(s)
        tm.assert_numpy_array_equal(result, expected)
        self.assertEqual(result.dtype, expected.dtype)

        arr = s.values
        result = algos.unique(arr)
        tm.assert_numpy_array_equal(result, expected)
        self.assertEqual(result.dtype, expected.dtype)
示例#4
0
    def test_dateparser_resolution_if_not_ns(self):
        # issue 10245
        data = """\
date,time,prn,rxstatus
2013-11-03,19:00:00,126,00E80000
2013-11-03,19:00:00,23,00E80000
2013-11-03,19:00:00,13,00E80000
"""

        def date_parser(date, time):
            datetime = np_array_datetime64_compat(
                date + 'T' + time + 'Z', dtype='datetime64[s]')
            return datetime

        df = read_csv(StringIO(data), date_parser=date_parser,
                      parse_dates={'datetime': ['date', 'time']},
                      index_col=['datetime', 'prn'])

        datetimes = np_array_datetime64_compat(['2013-11-03T19:00:00Z'] * 3,
                                               dtype='datetime64[s]')
        df_correct = DataFrame(data={'rxstatus': ['00E80000'] * 3},
                               index=MultiIndex.from_tuples(
                                   [(datetimes[0], 126),
                                    (datetimes[1], 23),
                                    (datetimes[2], 13)],
                               names=['datetime', 'prn']))
        assert_frame_equal(df, df_correct)
示例#5
0
    def test_parsing_valid_dates(self):
        arr = np.array(['01-01-2013', '01-02-2013'], dtype=object)
        self.assert_numpy_array_equal(
            tslib.array_to_datetime(arr),
            np_array_datetime64_compat([
                '2013-01-01T00:00:00.000000000-0000',
                '2013-01-02T00:00:00.000000000-0000'
            ],
                                       dtype='M8[ns]'))

        arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object)
        self.assert_numpy_array_equal(
            tslib.array_to_datetime(arr),
            np_array_datetime64_compat([
                '2013-09-16T00:00:00.000000000-0000',
                '2013-09-17T00:00:00.000000000-0000'
            ],
                                       dtype='M8[ns]'))
示例#6
0
    def test_coerce_of_invalid_datetimes(self):
        arr = np.array(["01-01-2013", "not_a_date", "1"], dtype=object)

        # Without coercing, the presence of any invalid dates prevents
        # any values from being converted
        self.assert_numpy_array_equal(tslib.array_to_datetime(arr, errors="ignore"), arr)

        # With coercing, the invalid dates becomes iNaT
        self.assert_numpy_array_equal(
            tslib.array_to_datetime(arr, errors="coerce"),
            np_array_datetime64_compat(["2013-01-01T00:00:00.000000000-0000", tslib.iNaT, tslib.iNaT], dtype="M8[ns]"),
        )
示例#7
0
    def test_coerce_of_invalid_datetimes(self):
        arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object)

        # Without coercing, the presence of any invalid dates prevents
        # any values from being converted
        self.assert_numpy_array_equal(
            tslib.array_to_datetime(arr, errors='ignore'), arr)

        # With coercing, the invalid dates becomes iNaT
        self.assert_numpy_array_equal(
            tslib.array_to_datetime(arr, errors='coerce'),
            np_array_datetime64_compat(
                ['2013-01-01T00:00:00.000000000-0000', tslib.iNaT, tslib.iNaT],
                dtype='M8[ns]'))
示例#8
0
    def test_coercing_dates_outside_of_datetime64_ns_bounds(self):
        invalid_dates = [
            datetime.date(1000, 1, 1),
            datetime.datetime(1000, 1, 1),
            "1000-01-01",
            "Jan 1, 1000",
            np.datetime64("1000-01-01"),
        ]

        for invalid_date in invalid_dates:
            self.assertRaises(
                ValueError, tslib.array_to_datetime, np.array([invalid_date], dtype="object"), errors="raise"
            )
            self.assert_numpy_array_equal(
                tslib.array_to_datetime(np.array([invalid_date], dtype="object"), errors="coerce"),
                np.array([tslib.iNaT], dtype="M8[ns]"),
            )

        arr = np.array(["1/1/1000", "1/1/2000"], dtype=object)
        self.assert_numpy_array_equal(
            tslib.array_to_datetime(arr, errors="coerce"),
            np_array_datetime64_compat([tslib.iNaT, "2000-01-01T00:00:00.000000000-0000"], dtype="M8[ns]"),
        )
示例#9
0
 def date_parser(date, time):
     datetime = np_array_datetime64_compat(
         date + 'T' + time + 'Z', dtype='datetime64[s]')
     return datetime
示例#10
0
文件: test_base.py 项目: Arup/pandas
    def test_value_counts_inferred(self):
        klasses = [Index, Series]
        for klass in klasses:
            s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
            s = klass(s_values)
            expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(s.value_counts(), expected)

            self.assert_numpy_array_equal(s.unique(), np.unique(s_values))
            self.assertEqual(s.nunique(), 4)
            # don't sort, have to sort after the fact as not sorting is
            # platform-dep
            hist = s.value_counts(sort=False).sort_values()
            expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values()
            tm.assert_series_equal(hist, expected)

            # sort ascending
            hist = s.value_counts(ascending=True)
            expected = Series([1, 2, 3, 4], index=list('cdab'))
            tm.assert_series_equal(hist, expected)

            # relative histogram.
            hist = s.value_counts(normalize=True)
            expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(hist, expected)

            # bins
            self.assertRaises(TypeError,
                              lambda bins: s.value_counts(bins=bins), 1)

            s1 = Series([1, 1, 2, 3])
            res1 = s1.value_counts(bins=1)
            exp1 = Series({0.998: 4})
            tm.assert_series_equal(res1, exp1)
            res1n = s1.value_counts(bins=1, normalize=True)
            exp1n = Series({0.998: 1.0})
            tm.assert_series_equal(res1n, exp1n)

            self.assert_numpy_array_equal(s1.unique(), np.array([1, 2, 3]))
            self.assertEqual(s1.nunique(), 3)

            res4 = s1.value_counts(bins=4)
            exp4 = Series({0.998: 2,
                           1.5: 1,
                           2.0: 0,
                           2.5: 1}, index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4, exp4)
            res4n = s1.value_counts(bins=4, normalize=True)
            exp4n = Series(
                {0.998: 0.5,
                 1.5: 0.25,
                 2.0: 0.0,
                 2.5: 0.25}, index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4n, exp4n)

            # handle NA's properly
            s_values = ['a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a',
                        'b']
            s = klass(s_values)
            expected = Series([4, 3, 2], index=['b', 'a', 'd'])
            tm.assert_series_equal(s.value_counts(), expected)

            self.assert_numpy_array_equal(s.unique(), np.array(
                ['a', 'b', np.nan, 'd'], dtype='O'))
            self.assertEqual(s.nunique(), 3)

            s = klass({})
            expected = Series([], dtype=np.int64)
            tm.assert_series_equal(s.value_counts(), expected,
                                   check_index_type=False)
            self.assert_numpy_array_equal(s.unique(), np.array([]))
            self.assertEqual(s.nunique(), 0)

            # GH 3002, datetime64[ns]
            # don't test names though
            txt = "\n".join(['xxyyzz20100101PIE', 'xxyyzz20100101GUM',
                             'xxyyzz20100101EGG', 'xxyyww20090101EGG',
                             'foofoo20080909PIE', 'foofoo20080909GUM'])
            f = StringIO(txt)
            df = pd.read_fwf(f, widths=[6, 8, 3],
                             names=["person_id", "dt", "food"],
                             parse_dates=["dt"])

            s = klass(df['dt'].copy())
            s.name = None

            idx = pd.to_datetime(
                ['2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z',
                 '2009-01-01 00:00:00X'])
            expected_s = Series([3, 2, 1], index=idx)
            tm.assert_series_equal(s.value_counts(), expected_s)

            expected = np_array_datetime64_compat(['2010-01-01 00:00:00Z',
                                                   '2009-01-01 00:00:00Z',
                                                   '2008-09-09 00:00:00Z'],
                                                  dtype='datetime64[ns]')
            if isinstance(s, DatetimeIndex):
                expected = DatetimeIndex(expected)
                self.assertTrue(s.unique().equals(expected))
            else:
                self.assert_numpy_array_equal(s.unique(), expected)

            self.assertEqual(s.nunique(), 3)

            # with NaT
            s = df['dt'].copy()
            s = klass([v for v in s.values] + [pd.NaT])

            result = s.value_counts()
            self.assertEqual(result.index.dtype, 'datetime64[ns]')
            tm.assert_series_equal(result, expected_s)

            result = s.value_counts(dropna=False)
            expected_s[pd.NaT] = 1
            tm.assert_series_equal(result, expected_s)

            unique = s.unique()
            self.assertEqual(unique.dtype, 'datetime64[ns]')

            # numpy_array_equal cannot compare pd.NaT
            self.assert_numpy_array_equal(unique[:3], expected)
            self.assertTrue(unique[3] is pd.NaT or unique[3].astype('int64') ==
                            pd.tslib.iNaT)

            self.assertEqual(s.nunique(), 3)
            self.assertEqual(s.nunique(dropna=False), 4)

            # timedelta64[ns]
            td = df.dt - df.dt + timedelta(1)
            td = klass(td, name='dt')

            result = td.value_counts()
            expected_s = Series([6], index=[Timedelta('1day')], name='dt')
            tm.assert_series_equal(result, expected_s)

            expected = TimedeltaIndex(['1 days'])
            if isinstance(td, TimedeltaIndex):
                self.assertTrue(td.unique().equals(expected))
            else:
                self.assert_numpy_array_equal(td.unique(), expected.values)

            td2 = timedelta(1) + (df.dt - df.dt)
            td2 = klass(td2, name='dt')
            result2 = td2.value_counts()
            tm.assert_series_equal(result2, expected_s)
示例#11
0
    def test_value_counts_inferred(self):
        klasses = [Index, Series]
        for klass in klasses:
            s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
            s = klass(s_values)
            expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(s.value_counts(), expected)

            self.assert_numpy_array_equal(s.unique(), np.unique(s_values))
            self.assertEqual(s.nunique(), 4)
            # don't sort, have to sort after the fact as not sorting is
            # platform-dep
            hist = s.value_counts(sort=False).sort_values()
            expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values()
            tm.assert_series_equal(hist, expected)

            # sort ascending
            hist = s.value_counts(ascending=True)
            expected = Series([1, 2, 3, 4], index=list('cdab'))
            tm.assert_series_equal(hist, expected)

            # relative histogram.
            hist = s.value_counts(normalize=True)
            expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(hist, expected)

            # bins
            self.assertRaises(TypeError,
                              lambda bins: s.value_counts(bins=bins), 1)

            s1 = Series([1, 1, 2, 3])
            res1 = s1.value_counts(bins=1)
            exp1 = Series({0.998: 4})
            tm.assert_series_equal(res1, exp1)
            res1n = s1.value_counts(bins=1, normalize=True)
            exp1n = Series({0.998: 1.0})
            tm.assert_series_equal(res1n, exp1n)

            self.assert_numpy_array_equal(s1.unique(), np.array([1, 2, 3]))
            self.assertEqual(s1.nunique(), 3)

            res4 = s1.value_counts(bins=4)
            exp4 = Series({
                0.998: 2,
                1.5: 1,
                2.0: 0,
                2.5: 1
            },
                          index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4, exp4)
            res4n = s1.value_counts(bins=4, normalize=True)
            exp4n = Series({
                0.998: 0.5,
                1.5: 0.25,
                2.0: 0.0,
                2.5: 0.25
            },
                           index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4n, exp4n)

            # handle NA's properly
            s_values = [
                'a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b'
            ]
            s = klass(s_values)
            expected = Series([4, 3, 2], index=['b', 'a', 'd'])
            tm.assert_series_equal(s.value_counts(), expected)

            self.assert_numpy_array_equal(
                s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O'))
            self.assertEqual(s.nunique(), 3)

            s = klass({})
            expected = Series([], dtype=np.int64)
            tm.assert_series_equal(s.value_counts(),
                                   expected,
                                   check_index_type=False)
            self.assert_numpy_array_equal(s.unique(), np.array([]))
            self.assertEqual(s.nunique(), 0)

            # GH 3002, datetime64[ns]
            # don't test names though
            txt = "\n".join([
                'xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG',
                'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM'
            ])
            f = StringIO(txt)
            df = pd.read_fwf(f,
                             widths=[6, 8, 3],
                             names=["person_id", "dt", "food"],
                             parse_dates=["dt"])

            s = klass(df['dt'].copy())
            s.name = None

            idx = pd.to_datetime([
                '2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z',
                '2009-01-01 00:00:00X'
            ])
            expected_s = Series([3, 2, 1], index=idx)
            tm.assert_series_equal(s.value_counts(), expected_s)

            expected = np_array_datetime64_compat([
                '2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z',
                '2008-09-09 00:00:00Z'
            ],
                                                  dtype='datetime64[ns]')
            if isinstance(s, DatetimeIndex):
                expected = DatetimeIndex(expected)
                self.assertTrue(s.unique().equals(expected))
            else:
                self.assert_numpy_array_equal(s.unique(), expected)

            self.assertEqual(s.nunique(), 3)

            # with NaT
            s = df['dt'].copy()
            s = klass([v for v in s.values] + [pd.NaT])

            result = s.value_counts()
            self.assertEqual(result.index.dtype, 'datetime64[ns]')
            tm.assert_series_equal(result, expected_s)

            result = s.value_counts(dropna=False)
            expected_s[pd.NaT] = 1
            tm.assert_series_equal(result, expected_s)

            unique = s.unique()
            self.assertEqual(unique.dtype, 'datetime64[ns]')

            # numpy_array_equal cannot compare pd.NaT
            self.assert_numpy_array_equal(unique[:3], expected)
            self.assertTrue(unique[3] is pd.NaT
                            or unique[3].astype('int64') == pd.tslib.iNaT)

            self.assertEqual(s.nunique(), 3)
            self.assertEqual(s.nunique(dropna=False), 4)

            # timedelta64[ns]
            td = df.dt - df.dt + timedelta(1)
            td = klass(td, name='dt')

            result = td.value_counts()
            expected_s = Series([6], index=[Timedelta('1day')], name='dt')
            tm.assert_series_equal(result, expected_s)

            expected = TimedeltaIndex(['1 days'])
            if isinstance(td, TimedeltaIndex):
                self.assertTrue(td.unique().equals(expected))
            else:
                self.assert_numpy_array_equal(td.unique(), expected.values)

            td2 = timedelta(1) + (df.dt - df.dt)
            td2 = klass(td2, name='dt')
            result2 = td2.value_counts()
            tm.assert_series_equal(result2, expected_s)