Exemplo n.º 1
0
    def to_timestamp(self, freq=None, how='start', tz=None):
        """
        Return the Timestamp representation of the Period at the target
        frequency at the specified end (how) of the Period

        Parameters
        ----------
        freq : string or DateOffset, default is 'D' if self.freq is week or
               longer and 'S' otherwise
            Target frequency
        how: str, default 'S' (start)
            'S', 'E'. Can be aliased as case insensitive
            'Start', 'Finish', 'Begin', 'End'

        Returns
        -------
        Timestamp
        """
        how = _validate_end_alias(how)

        if freq is None:
            base, mult = _gfc(self.freq)
            freq = _freq_mod.get_to_timestamp_base(base)

        base, mult = _gfc(freq)
        val = self.asfreq(freq, how)

        dt64 = tslib.period_ordinal_to_dt64(val.ordinal, base)
        return Timestamp(dt64, tz=tz)
Exemplo n.º 2
0
    def to_timestamp(self, freq=None, how='S'):
        """
        Return the Timestamp at the start/end of the period

        Parameters
        ----------
        freq : string or DateOffset, default frequency of PeriodIndex
            Target frequency
        how: str, default 'S' (start)
            'S', 'E'. Can be aliased as case insensitive
            'Start', 'Finish', 'Begin', 'End'

        Returns
        -------
        Timestamp
        """
        if freq is None:
            base, mult = _gfc(self.freq)
            new_val = self
        else:
            base, mult = _gfc(freq)
            new_val = self.asfreq(freq, how)

        dt64 = plib.period_ordinal_to_dt64(new_val.ordinal, base)
        ts_freq = _period_rule_to_timestamp_rule(new_val.freq, how=how)
        return Timestamp(dt64, offset=to_offset(ts_freq))
Exemplo n.º 3
0
    def test_multiple_date_col_timestamp_parse(self):
        data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25
05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25"""
        result = self.read_csv(StringIO(data), sep=',', header=None,
                               parse_dates=[[0, 1]], date_parser=Timestamp)

        ex_val = Timestamp('05/31/2012 15:30:00.029')
        self.assertEqual(result['0_1'][0], ex_val)
Exemplo n.º 4
0
    def test_datetime_nanosecond_unit(self):
        from datetime import datetime
        from pandas.lib import Timestamp

        val = datetime.now()
        stamp = Timestamp(val)

        roundtrip = ujson.decode(ujson.encode(val))
        self.assert_(roundtrip == stamp.value)
Exemplo n.º 5
0
def _to_m8(key):
    '''
    Timestamp-like => dt64
    '''
    if not isinstance(key, datetime):
        # this also converts strings
        key = Timestamp(key)

    return np.int64(lib.pydt_to_i8(key)).view(_NS_DTYPE)
Exemplo n.º 6
0
 def max(self, axis=None):
     """
     Overridden ndarray.max to return a Timestamp
     """
     if self.is_monotonic:
         return self[-1]
     else:
         max_stamp = self.asi8.max()
         return Timestamp(max_stamp, tz=self.tz)
Exemplo n.º 7
0
 def min(self, axis=None):
     """
     Overridden ndarray.min to return a Timestamp
     """
     if self.is_monotonic:
         return self[0]
     else:
         min_stamp = self.asi8.min()
         return Timestamp(min_stamp, tz=self.tz)
Exemplo n.º 8
0
def _dt_box_array(arr, offset=None, tz=None):
    if arr is None:
        return arr

    if not isinstance(arr, np.ndarray):
        return arr

    boxfunc = lambda x: Timestamp(x, offset=offset, tz=tz)
    return lib.map_infer(arr, boxfunc)
Exemplo n.º 9
0
def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
    from pandas.tseries.tools import normalize_date

    # First and last offsets should be calculated from the start day to fix an
    # error cause by resampling across multiple days when a one day period is
    # not a multiple of the frequency.
    #
    # See https://github.com/pydata/pandas/issues/8683

    start_day_nanos = Timestamp(normalize_date(first)).value

    base_nanos = (base % offset.n) * offset.nanos // offset.n
    start_day_nanos += base_nanos

    foffset = (first.value - start_day_nanos) % offset.nanos
    loffset = (last.value - start_day_nanos) % offset.nanos

    if closed == 'right':
        if foffset > 0:
            # roll back
            fresult = first.value - foffset
        else:
            fresult = first.value - offset.nanos

        if loffset > 0:
            # roll forward
            lresult = last.value + (offset.nanos - loffset)
        else:
            # already the end of the road
            lresult = last.value
    else:  # closed == 'left'
        if foffset > 0:
            fresult = first.value - foffset
        else:
            # start of the road
            fresult = first.value

        if loffset > 0:
            # roll forward
            lresult = last.value + (offset.nanos - loffset)
        else:
            lresult = last.value + offset.nanos

    return (Timestamp(fresult, tz=first.tz), Timestamp(lresult, tz=last.tz))
Exemplo n.º 10
0
def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
    from pandas.tseries.tools import normalize_date

    start_day_nanos = Timestamp(normalize_date(first)).value
    last_day_nanos = Timestamp(normalize_date(last)).value

    base_nanos = (base % offset.n) * offset.nanos // offset.n
    start_day_nanos += base_nanos
    last_day_nanos += base_nanos

    foffset = (first.value - start_day_nanos) % offset.nanos
    loffset = (last.value - last_day_nanos) % offset.nanos

    if closed == 'right':
        if foffset > 0:
            # roll back
            fresult = first.value - foffset
        else:
            fresult = first.value - offset.nanos

        if loffset > 0:
            # roll forward
            lresult = last.value + (offset.nanos - loffset)
        else:
            # already the end of the road
            lresult = last.value
    else:  # closed == 'left'
        if foffset > 0:
            fresult = first.value - foffset
        else:
            # start of the road
            fresult = first.value

        if loffset > 0:
            # roll forward
            lresult = last.value + (offset.nanos - loffset)
        else:
            lresult = last.value + offset.nanos

    return (Timestamp(fresult, tz=first.tz),
            Timestamp(lresult, tz=last.tz))
Exemplo n.º 11
0
    def _partial_date_slice(self, reso, parsed):
        if not self.is_monotonic:
            raise TimeSeriesError(
                'Partial indexing only valid for ordered time'
                ' series')

        if reso == 'year':
            t1 = Timestamp(datetime(parsed.year, 1, 1))
            t2 = Timestamp(datetime(parsed.year, 12, 31))
        elif reso == 'month':
            d = lib.monthrange(parsed.year, parsed.month)[1]
            t1 = Timestamp(datetime(parsed.year, parsed.month, 1))
            t2 = Timestamp(datetime(parsed.year, parsed.month, d))
        elif reso == 'quarter':
            qe = (((parsed.month - 1) + 2) % 12) + 1  # two months ahead
            d = lib.monthrange(parsed.year, qe)[1]  # at end of month
            t1 = Timestamp(datetime(parsed.year, parsed.month, 1))
            t2 = Timestamp(datetime(parsed.year, qe, d))
        else:
            raise KeyError

        stamps = self.asi8
        left = stamps.searchsorted(t1.value, side='left')
        right = stamps.searchsorted(t2.value, side='right')
        return slice(left, right)
Exemplo n.º 12
0
def _get_range_edges(axis, offset, closed='left', base=0):
    if isinstance(offset, basestring):
        offset = to_offset(offset)

    if isinstance(offset, Tick):
        day_nanos = _delta_to_nanoseconds(timedelta(1))
        # #1165
        if (day_nanos % offset.nanos) == 0:
            return _adjust_dates_anchored(axis[0],
                                          axis[-1],
                                          offset,
                                          closed=closed,
                                          base=base)

    if closed == 'left':
        first = Timestamp(offset.rollback(axis[0]))
    else:
        first = Timestamp(axis[0] - offset)

    last = Timestamp(axis[-1] + offset)

    return first, last
Exemplo n.º 13
0
    def test_usecols_with_parse_dates(self):
        # See gh-9755
        s = """a,b,c,d,e
        0,1,20140101,0900,4
        0,1,20140102,1000,4"""
        parse_dates = [[1, 2]]

        cols = {
            'a': [0, 0],
            'c_d': [
                Timestamp('2014-01-01 09:00:00'),
                Timestamp('2014-01-02 10:00:00')
            ]
        }
        expected = DataFrame(cols, columns=['c_d', 'a'])

        df = self.read_csv(StringIO(s), usecols=[0, 2, 3],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        df = self.read_csv(StringIO(s), usecols=[3, 0, 2],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)
Exemplo n.º 14
0
    def test_index_groupby(self):
        int_idx = Index(range(6))
        float_idx = Index(np.arange(0, 0.6, 0.1))
        obj_idx = Index('A B C D E F'.split())
        dt_idx = pd.date_range('2013-01-01', freq='M', periods=6)

        for idx in [int_idx, float_idx, obj_idx, dt_idx]:
            to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1])
            self.assertEqual(idx.groupby(to_groupby),
                             {1.0: [idx[0], idx[5]], 2.0: [idx[1], idx[4]]})

            to_groupby = Index([datetime(2011, 11, 1),
                                datetime(2011, 12, 1),
                                pd.NaT,
                                pd.NaT,
                                datetime(2011, 12, 1),
                                datetime(2011, 11, 1)],
                               tz='UTC').values

            ex_keys = pd.tslib.datetime_to_datetime64(np.array([Timestamp(
                '2011-11-01'), Timestamp('2011-12-01')]))
            expected = {ex_keys[0][0]: [idx[0], idx[5]],
                        ex_keys[0][1]: [idx[1], idx[4]]}
            self.assertEqual(idx.groupby(to_groupby), expected)
Exemplo n.º 15
0
    def test_index_groupby(self):
        int_idx = Index(range(6))
        float_idx = Index(np.arange(0, 0.6, 0.1))
        obj_idx = Index('A B C D E F'.split())
        dt_idx = pd.date_range('2013-01-01', freq='M', periods=6)

        for idx in [int_idx, float_idx, obj_idx, dt_idx]:
            to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1])
            tm.assert_dict_equal(idx.groupby(to_groupby), {
                1.0: idx[[0, 5]],
                2.0: idx[[1, 4]]
            })

            to_groupby = Index([
                datetime(2011, 11, 1),
                datetime(2011, 12, 1), pd.NaT, pd.NaT,
                datetime(2011, 12, 1),
                datetime(2011, 11, 1)
            ],
                               tz='UTC').values

            ex_keys = [Timestamp('2011-11-01'), Timestamp('2011-12-01')]
            expected = {ex_keys[0]: idx[[0, 5]], ex_keys[1]: idx[[1, 4]]}
            tm.assert_dict_equal(idx.groupby(to_groupby), expected)
Exemplo n.º 16
0
    def test_astype(self):
        # GH 13149, GH 13209
        idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])

        result = idx.astype(object)
        expected = Index([Timestamp('2016-05-16')] + [NaT] * 3, dtype=object)
        tm.assert_index_equal(result, expected)

        result = idx.astype(int)
        expected = Int64Index([1463356800000000000] +
                              [-9223372036854775808] * 3,
                              dtype=np.int64)
        tm.assert_index_equal(result, expected)

        rng = date_range('1/1/2000', periods=10)
        result = rng.astype('i8')
        self.assert_numpy_array_equal(result, rng.asi8)
Exemplo n.º 17
0
    def __getitem__(self, key):
        """Override numpy.ndarray's __getitem__ method to work as desired"""
        arr_idx = self.view(np.ndarray)
        if np.isscalar(key):
            val = arr_idx[key]
            return Timestamp(val, offset=self.offset, tz=self.tz)
        else:
            if com._is_bool_indexer(key):
                key = np.asarray(key)
                key = lib.maybe_booleans_to_slice(key.view(np.uint8))

            new_offset = None
            if isinstance(key, slice):
                if self.offset is not None and key.step is not None:
                    new_offset = key.step * self.offset
                else:
                    new_offset = self.offset

            result = arr_idx[key]
            if result.ndim > 1:
                return result

            return self._simple_new(result, self.name, new_offset, self.tz)
Exemplo n.º 18
0
    def get_loc(self, key):
        """
        Get integer location for requested label

        Returns
        -------
        loc : int
        """
        try:
            return self._engine.get_loc(key)
        except KeyError:
            try:
                return self._get_string_slice(key)
            except (TypeError, KeyError, ValueError):
                pass

            if isinstance(key, time):
                return self.indexer_at_time(key)

            try:
                return self._engine.get_loc(Timestamp(key))
            except (KeyError, ValueError):
                raise KeyError(key)
Exemplo n.º 19
0
    def get_value(self, series, key):
        """
        Fast lookup of value from 1-dimensional ndarray. Only use this if you
        know what you're doing
        """
        try:
            return Index.get_value(self, series, key)
        except KeyError:

            try:
                loc = self._get_string_slice(key)
                return series[loc]
            except (TypeError, ValueError, KeyError):
                pass

            if isinstance(key, time):
                locs = self._indices_at_time(key)
                return series.take(locs)

            stamp = Timestamp(key)
            try:
                return self._engine.get_value(series, stamp)
            except KeyError:
                raise KeyError(stamp)
Exemplo n.º 20
0
    def test_constructor_invalid(self):

        # invalid
        self.assertRaises(TypeError, Float64Index, 0.)
        self.assertRaises(TypeError, Float64Index, ['a', 'b', 0.])
        self.assertRaises(TypeError, Float64Index, [Timestamp('20130101')])
Exemplo n.º 21
0
    def test_construction_index_with_mixed_timezones_with_NaT(self):
        # GH 11488
        result = Index(
            [pd.NaT,
             Timestamp('2011-01-01'), pd.NaT,
             Timestamp('2011-01-02')],
            name='idx')
        exp = DatetimeIndex(
            [pd.NaT,
             Timestamp('2011-01-01'), pd.NaT,
             Timestamp('2011-01-02')],
            name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))
        self.assertIsNone(result.tz)

        # same tz results in DatetimeIndex
        result = Index([
            pd.NaT,
            Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT,
            Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')
        ],
                       name='idx')
        exp = DatetimeIndex([
            pd.NaT,
            Timestamp('2011-01-01 10:00'), pd.NaT,
            Timestamp('2011-01-02 10:00')
        ],
                            tz='Asia/Tokyo',
                            name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))
        self.assertIsNotNone(result.tz)
        self.assertEqual(result.tz, exp.tz)

        # same tz results in DatetimeIndex (DST)
        result = Index([
            Timestamp('2011-01-01 10:00', tz='US/Eastern'), pd.NaT,
            Timestamp('2011-08-01 10:00', tz='US/Eastern')
        ],
                       name='idx')
        exp = DatetimeIndex([
            Timestamp('2011-01-01 10:00'), pd.NaT,
            Timestamp('2011-08-01 10:00')
        ],
                            tz='US/Eastern',
                            name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))
        self.assertIsNotNone(result.tz)
        self.assertEqual(result.tz, exp.tz)

        # different tz results in Index(dtype=object)
        result = Index([
            pd.NaT,
            Timestamp('2011-01-01 10:00'), pd.NaT,
            Timestamp('2011-01-02 10:00', tz='US/Eastern')
        ],
                       name='idx')
        exp = Index([
            pd.NaT,
            Timestamp('2011-01-01 10:00'), pd.NaT,
            Timestamp('2011-01-02 10:00', tz='US/Eastern')
        ],
                    dtype='object',
                    name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertFalse(isinstance(result, DatetimeIndex))

        result = Index([
            pd.NaT,
            Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT,
            Timestamp('2011-01-02 10:00', tz='US/Eastern')
        ],
                       name='idx')
        exp = Index([
            pd.NaT,
            Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT,
            Timestamp('2011-01-02 10:00', tz='US/Eastern')
        ],
                    dtype='object',
                    name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertFalse(isinstance(result, DatetimeIndex))

        # passing tz results in DatetimeIndex
        result = Index([
            pd.NaT,
            Timestamp('2011-01-01 10:00'), pd.NaT,
            Timestamp('2011-01-02 10:00', tz='US/Eastern')
        ],
                       tz='Asia/Tokyo',
                       name='idx')
        exp = DatetimeIndex([
            pd.NaT,
            Timestamp('2011-01-01 19:00'), pd.NaT,
            Timestamp('2011-01-03 00:00')
        ],
                            tz='Asia/Tokyo',
                            name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # all NaT
        result = Index([pd.NaT, pd.NaT], name='idx')
        exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))
        self.assertIsNone(result.tz)

        # all NaT with tz
        result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx')
        exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))
        self.assertIsNotNone(result.tz)
        self.assertEqual(result.tz, exp.tz)
Exemplo n.º 22
0
    def test_usecols_with_parse_dates(self):
        # See gh-9755
        s = """a,b,c,d,e
        0,1,20140101,0900,4
        0,1,20140102,1000,4"""
        parse_dates = [[1, 2]]

        cols = {
            'a': [0, 0],
            'c_d': [
                Timestamp('2014-01-01 09:00:00'),
                Timestamp('2014-01-02 10:00:00')
            ]
        }
        expected = DataFrame(cols, columns=['c_d', 'a'])

        df = self.read_csv(StringIO(s),
                           usecols=[0, 2, 3],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        df = self.read_csv(StringIO(s),
                           usecols=[3, 0, 2],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        # See gh-14792
        s = """a,b,c,d,e,f,g,h,i,j
        2016/09/21,1,1,2,3,4,5,6,7,8"""
        parse_dates = [0]
        usecols = list('abcdefghij')
        cols = {
            'a': Timestamp('2016-09-21'),
            'b': [1],
            'c': [1],
            'd': [2],
            'e': [3],
            'f': [4],
            'g': [5],
            'h': [6],
            'i': [7],
            'j': [8]
        }
        expected = DataFrame(cols, columns=usecols)
        df = self.read_csv(StringIO(s),
                           usecols=usecols,
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        s = """a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8"""
        parse_dates = [[0, 1]]
        usecols = list('abcdefghij')
        cols = {
            'a_b': '2016/09/21 1',
            'c': [1],
            'd': [2],
            'e': [3],
            'f': [4],
            'g': [5],
            'h': [6],
            'i': [7],
            'j': [8]
        }
        expected = DataFrame(cols, columns=['a_b'] + list('cdefghij'))
        df = self.read_csv(StringIO(s),
                           usecols=usecols,
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)
Exemplo n.º 23
0
    def test_construction_dti_with_mixed_timezones(self):
        # GH 11488 (not changed, added explicit tests)

        # no tz results in DatetimeIndex
        result = DatetimeIndex(
            [Timestamp('2011-01-01'),
             Timestamp('2011-01-02')], name='idx')
        exp = DatetimeIndex([Timestamp('2011-01-01'),
                             Timestamp('2011-01-02')],
                            name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # same tz results in DatetimeIndex
        result = DatetimeIndex([
            Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
            Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')
        ],
                               name='idx')
        exp = DatetimeIndex(
            [Timestamp('2011-01-01 10:00'),
             Timestamp('2011-01-02 10:00')],
            tz='Asia/Tokyo',
            name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # same tz results in DatetimeIndex (DST)
        result = DatetimeIndex([
            Timestamp('2011-01-01 10:00', tz='US/Eastern'),
            Timestamp('2011-08-01 10:00', tz='US/Eastern')
        ],
                               name='idx')
        exp = DatetimeIndex(
            [Timestamp('2011-01-01 10:00'),
             Timestamp('2011-08-01 10:00')],
            tz='US/Eastern',
            name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # different tz coerces tz-naive to tz-awareIndex(dtype=object)
        result = DatetimeIndex([
            Timestamp('2011-01-01 10:00'),
            Timestamp('2011-01-02 10:00', tz='US/Eastern')
        ],
                               name='idx')
        exp = DatetimeIndex(
            [Timestamp('2011-01-01 05:00'),
             Timestamp('2011-01-02 10:00')],
            tz='US/Eastern',
            name='idx')
        self.assert_index_equal(result, exp, exact=True)
        self.assertTrue(isinstance(result, DatetimeIndex))

        # tz mismatch affecting to tz-aware raises TypeError/ValueError
        with tm.assertRaises(ValueError):
            DatetimeIndex([
                Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
                Timestamp('2011-01-02 10:00', tz='US/Eastern')
            ],
                          name='idx')

        with tm.assertRaises(TypeError):
            DatetimeIndex([
                Timestamp('2011-01-01 10:00'),
                Timestamp('2011-01-02 10:00', tz='US/Eastern')
            ],
                          tz='Asia/Tokyo',
                          name='idx')

        with tm.assertRaises(ValueError):
            DatetimeIndex([
                Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
                Timestamp('2011-01-02 10:00', tz='US/Eastern')
            ],
                          tz='US/Eastern',
                          name='idx')
Exemplo n.º 24
0
 def end_time(self):
     ordinal = (self + 1).start_time.value - 1
     return Timestamp(ordinal)
Exemplo n.º 25
0
 def onOffset(cls, dt):
     if isinstance(dt, np.datetime64):
         dt = Timestamp(dt)
     return dt.weekday() < 5
Exemplo n.º 26
0
    def _cached_range(cls,
                      start=None,
                      end=None,
                      periods=None,
                      offset=None,
                      name=None):
        if start is not None:
            start = Timestamp(start)
        if end is not None:
            end = Timestamp(end)

        if offset is None:
            raise Exception('Must provide a DateOffset!')

        drc = _daterange_cache
        if offset not in _daterange_cache:
            xdr = generate_range(offset=offset,
                                 start=_CACHE_START,
                                 end=_CACHE_END)

            arr = np.array(_to_m8_array(list(xdr)),
                           dtype=_NS_DTYPE,
                           copy=False)

            cachedRange = arr.view(DatetimeIndex)
            cachedRange.offset = offset
            cachedRange.tz = None
            cachedRange.name = None
            drc[offset] = cachedRange
        else:
            cachedRange = drc[offset]

        if start is None:
            if end is None:
                raise Exception('Must provide start or end date!')
            if periods is None:
                raise Exception('Must provide number of periods!')

            assert (isinstance(end, Timestamp))

            end = offset.rollback(end)

            endLoc = cachedRange.get_loc(end) + 1
            startLoc = endLoc - periods
        elif end is None:
            assert (isinstance(start, Timestamp))
            start = offset.rollforward(start)

            startLoc = cachedRange.get_loc(start)
            if periods is None:
                raise Exception('Must provide number of periods!')

            endLoc = startLoc + periods
        else:
            if not offset.onOffset(start):
                start = offset.rollforward(start)

            if not offset.onOffset(end):
                end = offset.rollback(end)

            startLoc = cachedRange.get_loc(start)
            endLoc = cachedRange.get_loc(end) + 1

        indexSlice = cachedRange[startLoc:endLoc]
        indexSlice.name = name
        indexSlice.offset = offset

        return indexSlice
Exemplo n.º 27
0
    def test_usecols_with_parse_dates(self):
        # See gh-9755
        s = """a,b,c,d,e
        0,1,20140101,0900,4
        0,1,20140102,1000,4"""
        parse_dates = [[1, 2]]

        cols = {
            'a': [0, 0],
            'c_d': [
                Timestamp('2014-01-01 09:00:00'),
                Timestamp('2014-01-02 10:00:00')
            ]
        }
        expected = DataFrame(cols, columns=['c_d', 'a'])

        df = self.read_csv(StringIO(s),
                           usecols=[0, 2, 3],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        df = self.read_csv(StringIO(s),
                           usecols=[3, 0, 2],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        # See gh-13604
        s = """2008-02-07 09:40,1032.43
        2008-02-07 09:50,1042.54
        2008-02-07 10:00,1051.65
        """
        parse_dates = [0]
        names = ['date', 'values']
        usecols = names[:]

        index = Index([
            Timestamp('2008-02-07 09:40'),
            Timestamp('2008-02-07 09:50'),
            Timestamp('2008-02-07 10:00')
        ],
                      name='date')
        cols = {'values': [1032.43, 1042.54, 1051.65]}
        expected = DataFrame(cols, index=index)

        df = self.read_csv(StringIO(s),
                           parse_dates=parse_dates,
                           index_col=0,
                           usecols=usecols,
                           header=None,
                           names=names)
        tm.assert_frame_equal(df, expected)

        # See gh-14792
        s = """a,b,c,d,e,f,g,h,i,j
        2016/09/21,1,1,2,3,4,5,6,7,8"""
        parse_dates = [0]
        usecols = list('abcdefghij')
        cols = {
            'a': Timestamp('2016-09-21'),
            'b': [1],
            'c': [1],
            'd': [2],
            'e': [3],
            'f': [4],
            'g': [5],
            'h': [6],
            'i': [7],
            'j': [8]
        }
        expected = DataFrame(cols, columns=usecols)
        df = self.read_csv(StringIO(s),
                           usecols=usecols,
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        s = """a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8"""
        parse_dates = [[0, 1]]
        usecols = list('abcdefghij')
        cols = {
            'a_b': '2016/09/21 1',
            'c': [1],
            'd': [2],
            'e': [3],
            'f': [4],
            'g': [5],
            'h': [6],
            'i': [7],
            'j': [8]
        }
        expected = DataFrame(cols, columns=['a_b'] + list('cdefghij'))
        df = self.read_csv(StringIO(s),
                           usecols=usecols,
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)
Exemplo n.º 28
0
 def _get_object_index(self):
     boxfunc = lambda x: Timestamp(x, offset=self.offset, tz=self.tz)
     boxed_values = lib.map_infer(self.asi8, boxfunc)
     return Index(boxed_values, dtype=object)
Exemplo n.º 29
0
    def _generate(cls,
                  start,
                  end,
                  periods,
                  name,
                  offset,
                  tz=None,
                  normalize=False):
        if com._count_not_none(start, end, periods) < 2:
            raise ValueError('Must specify two of start, end, or periods')

        _normalized = True

        if start is not None:
            start = Timestamp(start)

        if end is not None:
            end = Timestamp(end)

        inferred_tz = tools._infer_tzinfo(start, end)

        if tz is not None and inferred_tz is not None:
            assert (inferred_tz == tz)
        elif inferred_tz is not None:
            tz = inferred_tz

        tz = tools._maybe_get_tz(tz)

        if start is not None:
            if normalize:
                start = normalize_date(start)
                _normalized = True
            else:
                _normalized = _normalized and start.time() == _midnight

        if end is not None:
            if normalize:
                end = normalize_date(end)
                _normalized = True
            else:
                _normalized = _normalized and end.time() == _midnight

        if hasattr(offset, 'delta') and offset != offsets.Day():
            if inferred_tz is None and tz is not None:
                # naive dates
                if start is not None and start.tz is None:
                    start = start.tz_localize(tz)

                if end is not None and end.tz is None:
                    end = end.tz_localize(tz)

            if start and end:
                if start.tz is None and end.tz is not None:
                    start = start.tz_localize(end.tz)

                if end.tz is None and start.tz is not None:
                    end = end.tz_localize(start.tz)

            if (offset._should_cache()
                    and not (offset._normalize_cache and not _normalized)
                    and _naive_in_cache_range(start, end)):
                index = cls._cached_range(start,
                                          end,
                                          periods=periods,
                                          offset=offset,
                                          name=name)
            else:
                index = _generate_regular_range(start, end, periods, offset)

        else:

            if inferred_tz is None and tz is not None:
                # naive dates
                if start is not None and start.tz is not None:
                    start = start.replace(tzinfo=None)

                if end is not None and end.tz is not None:
                    end = end.replace(tzinfo=None)

            if start and end:
                if start.tz is None and end.tz is not None:
                    end = end.replace(tzinfo=None)

                if end.tz is None and start.tz is not None:
                    start = start.replace(tzinfo=None)

            if (offset._should_cache()
                    and not (offset._normalize_cache and not _normalized)
                    and _naive_in_cache_range(start, end)):
                index = cls._cached_range(start,
                                          end,
                                          periods=periods,
                                          offset=offset,
                                          name=name)
            else:
                index = _generate_regular_range(start, end, periods, offset)

            if tz is not None and getattr(index, 'tz', None) is None:
                index = lib.tz_localize_to_utc(com._ensure_int64(index), tz)
                index = index.view(_NS_DTYPE)

        index = index.view(cls)
        index.name = name
        index.offset = offset
        index.tz = tz

        return index
Exemplo n.º 30
0

def _str_to_dt_array(arr, offset=None, dayfirst=None, yearfirst=None):
    def parser(x):
        result = parse_time_string(x,
                                   offset,
                                   dayfirst=dayfirst,
                                   yearfirst=None)
        return result[0]

    arr = np.asarray(arr, dtype=object)
    data = _algos.arrmap_object(arr, parser)
    return tools.to_datetime(data)


_CACHE_START = Timestamp(datetime(1950, 1, 1))
_CACHE_END = Timestamp(datetime(2030, 1, 1))

_daterange_cache = {}


def _naive_in_cache_range(start, end):
    if start is None or end is None:
        return False
    else:
        if start.tzinfo is not None or end.tzinfo is not None:
            return False
        return _in_range(start, end, _CACHE_START, _CACHE_END)


def _in_range(start, end, rng_start, rng_end):