示例#1
0
文件: resample.py 项目: u36073/pandas
def _get_range_edges(first, last, offset, closed='left', base=0):
    if isinstance(offset, compat.string_types):
        offset = to_offset(offset)

    if isinstance(offset, Tick):
        is_day = isinstance(offset, Day)
        day_nanos = delta_to_nanoseconds(timedelta(1))

        # #1165
        if (is_day and day_nanos % offset.nanos == 0) or not is_day:
            return _adjust_dates_anchored(first,
                                          last,
                                          offset,
                                          closed=closed,
                                          base=base)

    if not isinstance(offset, Tick):  # and first.time() != last.time():
        # hack!
        first = first.normalize()
        last = last.normalize()

    if closed == 'left':
        first = Timestamp(offset.rollback(first))
    else:
        first = Timestamp(first - offset)

    last = Timestamp(last + offset)

    return first, last
示例#2
0
    def test_usecols_with_parse_dates_and_usecol_names(self):
        # See gh-9755
        s = """0,1,20140101,0900,4
        0,1,20140102,1000,4"""
        parse_dates = [[1, 2]]
        names = list('acd')

        cols = {
            'a': [0, 0],
            'c_d': [
                Timestamp('2014-01-01 09:00:00'),
                Timestamp('2014-01-02 10:00:00')
            ]
        }
        expected = DataFrame(cols, columns=['c_d', 'a'])

        df = self.read_csv(StringIO(s),
                           names=names,
                           usecols=[0, 2, 3],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        df = self.read_csv(StringIO(s),
                           names=names,
                           usecols=[3, 0, 2],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)
示例#3
0
文件: resample.py 项目: u36073/pandas
def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
    # First and last offsets should be calculated from the start day to fix an
    # error cause by resampling across multiple days when a one day period is
    # not a multiple of the frequency.
    #
    # See https://github.com/pandas-dev/pandas/issues/8683

    # 14682 - Since we need to drop the TZ information to perform
    # the adjustment in the presence of a DST change,
    # save TZ Info and the DST state of the first and last parameters
    # so that we can accurately rebuild them at the end.
    first_tzinfo = first.tzinfo
    last_tzinfo = last.tzinfo
    first_dst = bool(first.dst())
    last_dst = bool(last.dst())

    first = first.tz_localize(None)
    last = last.tz_localize(None)

    start_day_nanos = first.normalize().value

    base_nanos = (base % offset.n) * offset.nanos // offset.n
    start_day_nanos += base_nanos

    foffset = (first.value - start_day_nanos) % offset.nanos
    loffset = (last.value - start_day_nanos) % offset.nanos

    if closed == 'right':
        if foffset > 0:
            # roll back
            fresult = first.value - foffset
        else:
            fresult = first.value - offset.nanos

        if loffset > 0:
            # roll forward
            lresult = last.value + (offset.nanos - loffset)
        else:
            # already the end of the road
            lresult = last.value
    else:  # closed == 'left'
        if foffset > 0:
            fresult = first.value - foffset
        else:
            # start of the road
            fresult = first.value

        if loffset > 0:
            # roll forward
            lresult = last.value + (offset.nanos - loffset)
        else:
            lresult = last.value + offset.nanos

    return (Timestamp(fresult).tz_localize(first_tzinfo, ambiguous=first_dst),
            Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst))
示例#4
0
    def test_override__set_noconvert_columns(self):
        # GH 17351 - usecols needs to be sorted in _setnoconvert_columns
        # based on the test_usecols_with_parse_dates test from usecols.py
        from pandas.io.parsers import CParserWrapper, TextFileReader

        s = """a,b,c,d,e
        0,1,20140101,0900,4
        0,1,20140102,1000,4"""

        parse_dates = [[1, 2]]
        cols = {
            'a': [0, 0],
            'c_d': [
                Timestamp('2014-01-01 09:00:00'),
                Timestamp('2014-01-02 10:00:00')
            ]
        }
        expected = DataFrame(cols, columns=['c_d', 'a'])

        class MyTextFileReader(TextFileReader):
            def __init__(self):
                self._currow = 0
                self.squeeze = False

        class MyCParserWrapper(CParserWrapper):
            def _set_noconvert_columns(self):
                if self.usecols_dtype == 'integer':
                    # self.usecols is a set, which is documented as unordered
                    # but in practice, a CPython set of integers is sorted.
                    # In other implementations this assumption does not hold.
                    # The following code simulates a different order, which
                    # before GH 17351 would cause the wrong columns to be
                    # converted via the parse_dates parameter
                    self.usecols = list(self.usecols)
                    self.usecols.reverse()
                return CParserWrapper._set_noconvert_columns(self)

        parser = MyTextFileReader()
        parser.options = {
            'usecols': [0, 2, 3],
            'parse_dates': parse_dates,
            'delimiter': ','
        }
        parser._engine = MyCParserWrapper(StringIO(s), **parser.options)
        df = parser.read()

        tm.assert_frame_equal(df, expected)
示例#5
0
    def test_multiple_date_col_timestamp_parse(self):
        data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25
05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25"""
        result = self.read_csv(StringIO(data), sep=',', header=None,
                               parse_dates=[[0, 1]], date_parser=Timestamp)

        ex_val = Timestamp('05/31/2012 15:30:00.029')
        assert result['0_1'][0] == ex_val
示例#6
0
    def test_index_groupby(self):
        int_idx = Index(range(6))
        float_idx = Index(np.arange(0, 0.6, 0.1))
        obj_idx = Index('A B C D E F'.split())
        dt_idx = pd.date_range('2013-01-01', freq='M', periods=6)

        for idx in [int_idx, float_idx, obj_idx, dt_idx]:
            to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1])
            tm.assert_dict_equal(idx.groupby(to_groupby),
                                 {1.0: idx[[0, 5]], 2.0: idx[[1, 4]]})

            to_groupby = Index([datetime(2011, 11, 1),
                                datetime(2011, 12, 1),
                                pd.NaT,
                                pd.NaT,
                                datetime(2011, 12, 1),
                                datetime(2011, 11, 1)],
                               tz='UTC').values

            ex_keys = [Timestamp('2011-11-01'), Timestamp('2011-12-01')]
            expected = {ex_keys[0]: idx[[0, 5]],
                        ex_keys[1]: idx[[1, 4]]}
            tm.assert_dict_equal(idx.groupby(to_groupby), expected)
    def test_constructor_invalid(self):

        # invalid
        pytest.raises(TypeError, Float64Index, 0.)
        pytest.raises(TypeError, Float64Index, ['a', 'b', 0.])
        pytest.raises(TypeError, Float64Index, [Timestamp('20130101')])
示例#8
0
    def test_usecols_with_parse_dates(self):
        # See gh-9755
        s = """a,b,c,d,e
        0,1,20140101,0900,4
        0,1,20140102,1000,4"""
        parse_dates = [[1, 2]]

        cols = {
            'a': [0, 0],
            'c_d': [
                Timestamp('2014-01-01 09:00:00'),
                Timestamp('2014-01-02 10:00:00')
            ]
        }
        expected = DataFrame(cols, columns=['c_d', 'a'])

        df = self.read_csv(StringIO(s),
                           usecols=[0, 2, 3],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        df = self.read_csv(StringIO(s),
                           usecols=[3, 0, 2],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        # See gh-13604
        s = """2008-02-07 09:40,1032.43
        2008-02-07 09:50,1042.54
        2008-02-07 10:00,1051.65
        """
        parse_dates = [0]
        names = ['date', 'values']
        usecols = names[:]

        index = Index([
            Timestamp('2008-02-07 09:40'),
            Timestamp('2008-02-07 09:50'),
            Timestamp('2008-02-07 10:00')
        ],
                      name='date')
        cols = {'values': [1032.43, 1042.54, 1051.65]}
        expected = DataFrame(cols, index=index)

        df = self.read_csv(StringIO(s),
                           parse_dates=parse_dates,
                           index_col=0,
                           usecols=usecols,
                           header=None,
                           names=names)
        tm.assert_frame_equal(df, expected)

        # See gh-14792
        s = """a,b,c,d,e,f,g,h,i,j
        2016/09/21,1,1,2,3,4,5,6,7,8"""
        parse_dates = [0]
        usecols = list('abcdefghij')
        cols = {
            'a': Timestamp('2016-09-21'),
            'b': [1],
            'c': [1],
            'd': [2],
            'e': [3],
            'f': [4],
            'g': [5],
            'h': [6],
            'i': [7],
            'j': [8]
        }
        expected = DataFrame(cols, columns=usecols)
        df = self.read_csv(StringIO(s),
                           usecols=usecols,
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        s = """a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8"""
        parse_dates = [[0, 1]]
        usecols = list('abcdefghij')
        cols = {
            'a_b': '2016/09/21 1',
            'c': [1],
            'd': [2],
            'e': [3],
            'f': [4],
            'g': [5],
            'h': [6],
            'i': [7],
            'j': [8]
        }
        expected = DataFrame(cols, columns=['a_b'] + list('cdefghij'))
        df = self.read_csv(StringIO(s),
                           usecols=usecols,
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)
示例#9
0
    def test_constructor_invalid(self):

        # invalid
        self.assertRaises(TypeError, Float64Index, 0.)
        self.assertRaises(TypeError, Float64Index, ['a', 'b', 0.])
        self.assertRaises(TypeError, Float64Index, [Timestamp('20130101')])