示例#1
0
def slide8():
    import pytz
    print pytz.common_timezones[-5:]

    print 'US/Eastern'
    tz = pytz.timezone('US/Eastern')
    print tz

    rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')
    ts = Series(np.random.randn(len(rng)), index=rng)
    print ts.index.tz

    print 'date_range utc'
    print pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')

    print 'tz_localize to UTC'
    ts_utc = ts.tz_localize('UTC')
    print ts_utc
    print ts_utc.index

    print 'tz_convert to us/Eastern'
    print ts_utc.tz_convert('US/Eastern')

    print 'tz_localize to us/Eastern'
    ts_eastern = ts.tz_localize('US/Eastern')
    print ts_eastern.tz_convert('UTC')

    print 'tz_convert'
    print ts_eastern.tz_convert('Europe/Berlin')

    stamp = pd.Timestamp('2011-03-12 04:00')
    stamp_utc = stamp.tz_localize('utc')
    print 'us/eastern'
    print stamp_utc.tz_convert('US/Eastern')

    stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')
    print 'moscow'
    print stamp_moscow

    print 'nano seconds'
    print stamp_utc.value
    print stamp_utc.tz_convert('US/Eastern').value

    from pandas.tseries.offsets import Hour
    stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')
    print stamp
    print '+hour'
    print stamp + Hour()
    stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')
    print 'summer time'
    print stamp + 2 * Hour()

    print 'between different time zones'
    rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')
    ts = Series(np.random.randn(len(rng)), index=rng)
    print ts
    ts1 = ts[:7].tz_localize('Europe/London')
    ts2 = ts1[2:].tz_convert('Europe/Moscow')
    result = ts1 + ts2
    print result.index
示例#2
0
def send_report(id_list):
    # Формирую график за сутки -------------------------------------
    try:
        make_graph('2T', Hour(24))
        for i in id_list:
            url = f"https://api.telegram.org/bot{token}/sendPhoto"
            files = {'photo': open('1.png', 'rb')}
            data = {'chat_id': i}
            session.post(url, files=files, data=data)

        with open('config.json', 'r') as f:
            jdata = json.load(f)

        with open('config.json', 'w') as f:
            jdata['report_today'] = True
            json.dump(jdata, f, indent=4)

    except Exception as e:
        syslog_to_csv('\n\nНеудачная отправка send_report\n\n')
        syslog_to_csv(e)
    # Формирую суточный отчёт в файл DOC_report24.csv
    try:
        data_dict = {}
        report_file_name = 'DOC_report24.csv'
        if not os.path.exists(report_file_name):
            with open(report_file_name, 'w') as f:
                f.write(','.join([
                    'DateTime', 'ObjectConsuption', 'MachinesPower', 'Import',
                    'MWh'
                ]) + '\n')

        df = pd.read_csv(data_file,
                         parse_dates=['Дата Время'],
                         index_col=['Дата Время'])
        data_sample = df[df.index[-1] - Hour(24):]
        data_dict['DateTime'] = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M')
        data_dict['ObjectConsuption'] = int(
            data_sample['Мощность завода'].mean())
        data_dict['MachinesPower'] = int(
            data_sample['Сумм мощность ГПГУ'].mean())
        data_dict['Import'] = int(data_sample['MainsImport'].mean())
        data_dict['MWh'] = data_sample['MWh'][-1] - data_sample['MWh'][0]
        to_csv(
            report_file_name,
            ['DateTime', 'ObjectConsuption', 'MachinesPower', 'Import', 'MWh'],
            data_dict)
    except Exception as e:
        print('Неудачное сохранение суточного отчёта в файл DOC_report24.csv')
        syslog_to_csv(
            '\n\nНеудачное сохранение суточного отчёта в файл DOC_report24.csv\n\n'
        )
        syslog_to_csv(e)
 def date_parse(time):
     tz = time.split(' ')[2]
     dt = parser.parse(time)
     if tz == 'EDT':
         return dt + Hour(12)
     elif tz == 'EST':
         return dt + Hour(13)
     elif tz == 'PDT':
         return dt + Hour(15)
     elif tz == 'PST':
         return dt + Hour(16)
     else:
         print('缺少时区:%s' % tz)
示例#4
0
def create_flux_ts(thresh_file, bin_width, area):
    # start by loading threshold data

    bins = str(int(bin_width / 60)) + 'T'

    names = ['id', 'jul', 'RE', 'FE', 'timeOverThresh']
    skiprows = f.linesToSkip('data/thresh/' + thresh_file + '.thresh')
    df = pd.read_csv('data/thresh/' + thresh_file + '.thresh',
                     skiprows=skiprows,
                     names=names,
                     delim_whitespace=True)

    df['date/times'] = df['jul'] + df['RE']
    start = df['RE'][0] - 0.5
    df['date/times'] = pd.to_datetime(map(f.get_date_time, df['date/times']))
    df.index = df['date/times']

    flux_ts = pd.Series(data=df['timeOverThresh'], index=df.index)

    flux_ts = flux_ts.resample(bins).count() * (1 / ((bin_width / 60) * area))

    offset_hours = (int(bin_width / 2) + int(start * 86400)) // 3600
    offset_minutes = (int(bin_width / 2) + int(start * 86400) -
                      offset_hours * 3600) // 60
    offset_seconds = int(bin_width / 2) + int(
        start * 86400) - offset_hours * 3600 - offset_minutes * 60
    offset = offset_hours * Hour() + offset_minutes * Minute(
    ) + offset_seconds * Second()

    flux_ts.index += offset
示例#5
0
def DP(iteraciones):
    dias = []
    dia = 0
    for index, row in ts.iterrows():
        start_boundary = index
        end_boundary = index + Hour()
        time_count = pd.Timedelta('0 m')
        for _, raw_data in df.iterrows():
            start_time = raw_data['Tiempo Inicio']
            end_time = raw_data['Tiempo final']
            if end_time > start_boundary:
                if start_time < end_boundary:
                    if start_time <= start_boundary:
                        if end_time >= end_boundary:
                            time_count = time_count + (end_boundary -
                                                       start_boundary)
                        else:
                            time_count = time + (end_time - start_boundary)
                    else:
                        if end_time >= end_boundary:
                            time_count = time_count + (end_boundary -
                                                       start_time)
                        else:
                            time_count = time_count + (end_time - start_time)
        ts.at[index, 'minuto'] = time_count.seconds / 60
        print(ts)
        print(df)
        print("calcular", (ts.at[index, 'minuto']) / 100)
    for i in range(iteraciones):
        for k in range(len(pesimista)):
            h = plt.hist(np.random.triangular(optimista[k], probable[k],
                                              pesimista[k], 100000),
                         bins=200,
                         density=True)
            plt.show()
示例#6
0
 def test_offset(self):
     stamp = pd.Timestamp('2018-03-25 01:30', tz='Europe/Paris')
     self.assertTrue(
         (stamp == pd.DatetimeIndex(['2018-03-25 01:30:00+01:00'])).all())
     self.assertTrue(
         (stamp + Hour() == pd.DatetimeIndex(['2018-03-25 03:30:00+02:00'
                                              ])).all())
def mergeDateWithTime(dateSeries, timeSeries):
    "date in Timestamp ####-##-##, time in ##-##"
    templist = []
    for i in range(len(dateSeries)):
        temp = timeSeries[i].split(":")
        time = dateSeries[i] + Hour(temp[0]) + Minute(temp[1])
        templist.append(time)
    return Series(templist)
 def test_combined_offset(self):
     expected = pd.DatetimeIndex(
         ['2000-01-01 00:00:00', '2000-01-01 06:30:00',
          '2000-01-01 13:00:00', '2000-01-01 19:30:00'])
     self.assertTrue((expected == pd.date_range('2000-01-01', '2000-01-02',
                                                freq='6h30min')).all())
     self.assertTrue((pd.date_range('2000-01-01', '2000-01-02',
                                    freq='6h30min') ==
                      pd.date_range('2000-01-01', '2000-01-02',
                                    freq=Hour(6) + Minute(30))).all())
 def test_simple_offset(self):
     expected = pd.DatetimeIndex(
         ['2000-01-01 00:00:00', '2000-01-01 06:00:00',
          '2000-01-01 12:00:00', '2000-01-01 18:00:00',
          '2000-01-02 00:00:00'])
     self.assertTrue((expected == pd.date_range('2000-01-01', '2000-01-02',
                                                freq='6h')).all())
     self.assertTrue((pd.date_range('2000-01-01', '2000-01-02', freq='6h') ==
                      pd.date_range('2000-01-01', '2000-01-02',
                                    freq=Hour(6))).all())
示例#10
0
def slide7():
    from pandas.tseries.offsets import Hour, Minute
    hour = Hour()
    print hour
    four_hours = Hour(4)
    print four_hours
    print pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h')

    print Hour(2) + Minute(30)
    print pd.date_range('1/1/2000', periods=10, freq='1h30min')

    ts = Series(np.random.randn(4),
                index=pd.date_range('1/1/2000', periods=4, freq='M'))
    print ts
    print ts.shift(2)
    print ts.shift(-2)
    print '2 M'
    print ts.shift(2, freq='M')
    print '3 D'
    print ts.shift(3, freq='D')
    print '1 3D'
    print ts.shift(1, freq='3D')
    print '1 90T'
    print ts.shift(1, freq='90T')

    print 'shifting dates with offsets'
    from pandas.tseries.offsets import Day, MonthEnd
    now = datetime(2011, 11, 17)
    print now + 3 * Day()
    print now + MonthEnd()
    print now + MonthEnd(2)

    offset = MonthEnd()
    print offset
    print offset.rollforward(now)
    print offset.rollback(now)

    ts = Series(np.random.randn(20),
                index=pd.date_range('1/15/2000', periods=20, freq='4d'))
    print ts.groupby(offset.rollforward).mean()
示例#11
0
class FromDictwithTimestamp:

    params = [Nano(1), Hour(1)]
    param_names = ["offset"]

    def setup(self, offset):
        N = 10**3
        idx = date_range(Timestamp("1/1/1900"), freq=offset, periods=N)
        df = DataFrame(np.random.randn(N, 10), index=idx)
        self.d = df.to_dict()

    def time_dict_with_timestamp_offsets(self, offset):
        DataFrame(self.d)
示例#12
0
class FromDictwithTimestamp(object):

    params = [Nano(1), Hour(1)]
    param_names = ['offset']

    def setup(self, offset):
        N = 10**3
        np.random.seed(1234)
        idx = date_range(Timestamp('1/1/1900'), freq=offset, periods=N)
        df = DataFrame(np.random.randn(N, 10), index=idx)
        self.d = df.to_dict()

    def time_dict_with_timestamp_offsets(self, offset):
        DataFrame(self.d)
示例#13
0
def create_flux_ts(thresh_file, bin_width, area, from_dir='data/thresh/'):
    # creates a time series of flux data
    # returns time series object of flux
    # bin_width is time bin size in seconds, area is area of detector in square meters

    # read in data from threshold file
    names = ['id', 'jul', 'RE', 'FE', 'FLUX']
    skiprows = f.linesToSkip(from_dir + thresh_file + '.thresh')
    df = pd.read_csv(from_dir + thresh_file + '.thresh',
                     skiprows=skiprows,
                     names=names,
                     delim_whitespace=True)

    # sort by date/times instead of julian days
    df['date/times'] = df['jul'] + df['RE']
    df['date/times'] = pd.to_datetime(map(f.get_date_time, df['date/times']))
    df.index = df['date/times']

    # create time series, sample according to bin_width
    # calculate bins in pandas notation
    bins = str(int(bin_width / 60)) + 'T'
    flux_ts = pd.Series(data=df['FLUX'], index=df.index)
    flux_ts = flux_ts.resample(bins).count() * (1 / ((bin_width / 60) * area))
    flux_ts.name = 'FLUX'

    # determine offset (basically the bin centers) and add to the index
    start = df['RE'][0] - 0.5
    offset_hours = (int(bin_width / 2) + int(start * 86400)) // 3600
    offset_minutes = (int(bin_width / 2) + int(start * 86400) -
                      offset_hours * 3600) // 60
    offset_seconds = int(bin_width / 2) + int(
        start * 86400) - offset_hours * 3600 - offset_minutes * 60
    offset = offset_hours * Hour() + offset_minutes * Minute(
    ) + offset_seconds * Second()
    flux_ts.index += offset

    # filter out unfilled bins
    for i in range(len(flux_ts)):
        if i == 0 and (flux_ts[i] == 0 or flux_ts[i + 1] == 0):
            flux_ts[i] = 'nan'
        if i > 0 and i < len(flux_ts) - 1 and (flux_ts[i - 1] == 0
                                               or flux_ts[i] == 0
                                               or flux_ts[i + 1] == 0):
            flux_ts[i] = 'nan'
        if i == len(flux_ts) - 1 and (flux_ts[i - 1] == 0 or flux_ts[i] == 0):
            flux_ts[i] = 'nan'

    flux_ts = flux_ts.interpolate()

    return flux_ts
 def to_offset(self) -> DateOffset:
     if self.value == "H":
         return Hour(1)
     elif self.value == "D":
         return Day(1)
     elif self.value == "W-MON":
         return Week(1, weekday=0)
     elif self.value == "MS":
         return MonthBegin(1)
     elif self.value == "QS-DEC":
         return QuarterBegin(startingMonth=10)
     elif self.value == "AS":
         return YearBegin(1)
     raise NotImplementedError(self.value)
class TestFreq:
    @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []])
    @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)])
    def test_freq_setter(self, values, freq):
        # GH#20678
        idx = TimedeltaIndex(values)

        # can set to an offset, converting from string if necessary
        idx._data.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, DateOffset)

        # can reset to None
        idx._data.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH#20678
        idx = TimedeltaIndex(["0 days", "2 days", "4 days"])

        # setting with an incompatible freq
        msg = ("Inferred frequency 2D from passed values does not conform to "
               "passed frequency 5D")
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "5D"

        # setting with a non-fixed frequency
        msg = r"<2 \* BusinessDays> is a non-fixed frequency"
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "2B"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx._data.freq = "foo"

    def test_freq_view_safe(self):
        # Setting the freq for one TimedeltaIndex shouldn't alter the freq
        #  for another that views the same data

        tdi = TimedeltaIndex(["0 days", "2 days", "4 days"], freq="2D")
        tda = tdi._data

        tdi2 = TimedeltaIndex(tda)._with_freq(None)
        assert tdi2.freq is None

        # Original was not altered
        assert tdi.freq == "2D"
        assert tda.freq == "2D"
示例#16
0
class TestFreq:
    def test_freq_setter_errors(self):
        # GH#20678
        idx = DatetimeIndex(["20180101", "20180103", "20180105"])

        # setting with an incompatible freq
        msg = ("Inferred frequency 2D from passed values does not conform to "
               "passed frequency 5D")
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "5D"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx._data.freq = "foo"

    @pytest.mark.parametrize("values",
                             [["20180101", "20180103", "20180105"], []])
    @pytest.mark.parametrize(
        "freq", ["2D", Day(2), "2B",
                 BDay(2), "48H", Hour(48)])
    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_freq_setter(self, values, freq, tz):
        # GH#20678
        idx = DatetimeIndex(values, tz=tz)

        # can set to an offset, converting from string if necessary
        idx._data.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, DateOffset)

        # can reset to None
        idx._data.freq = None
        assert idx.freq is None

    def test_freq_view_safe(self):
        # Setting the freq for one DatetimeIndex shouldn't alter the freq
        #  for another that views the same data

        dti = date_range("2016-01-01", periods=5)
        dta = dti._data

        dti2 = DatetimeIndex(dta)._with_freq(None)
        assert dti2.freq is None

        # Original was not altered
        assert dti.freq == "D"
        assert dta.freq == "D"
示例#17
0
def next_update_time(last_updated, freq='D', hour=18, minute=0, second=0):
    """计算下次更新时间
    说明:
        'S':移动到下一秒
        'm':移动到下一分钟
        'H':移动到下一小时
        'D':移动到下一天
        'W':移动到下周一
        'M':移动到下月第一天
        'Q':下一季度的第一天
        将时间调整到指定的hour和minute
    """
    if pd.isnull(last_updated):
        return MARKET_START
    if freq == 'S':
        off = Second()
        return last_updated + off
    elif freq == 'm':
        off = Minute()
        return last_updated + off
    elif freq == 'H':
        off = Hour()
        return last_updated + off
    elif freq == 'D':
        d = BDay(n=1, normalize=True)
        res = last_updated + d
        return res.replace(hour=hour, minute=minute, second=second)
    elif freq == 'W':
        w = Week(normalize=True, weekday=0)
        res = last_updated + w
        return res.replace(hour=hour, minute=minute, second=second)
    elif freq == 'M':
        m = MonthBegin(n=1, normalize=True)
        res = last_updated + m
        return res.replace(hour=hour, minute=minute, second=second)
    elif freq == 'Q':
        q = QuarterBegin(normalize=True, startingMonth=1)
        res = last_updated + q
        return res.replace(hour=hour, minute=minute, second=second)
    else:
        raise TypeError('不能识别的周期类型,仅接受{}'.format(
            ('S', 'm', 'H', 'D', 'W', 'M', 'Q')))
示例#18
0
    def create_position_info(self):
        """
        Create daily profit loss using stock data not using statement data
        :return: dict
        """
        last_close = self.stocks.last().close
        if self.date == self.position_set.stop_date and self.position_set.status == 'CLOSE':
            last_close = self.close_order.net_price

        stage = self.position_set.get_stage(last_close)
        status = self.position_set.current_status(
            new_price=last_close, old_price=self.stocks.reverse()[1].close
        )

        pl_open = (last_close - self.open_order.net_price) * self.open_order.quantity
        pl_open_pct = round(pl_open / (self.open_order.net_price * self.open_order.quantity) * 100, 2)

        if self.date == self.start_date:
            pl_day = (last_close - self.open_order.net_price) * self.open_order.quantity
        elif self.date == self.stop_date:
            pl_day = (self.close_order.net_price - self.stocks.reverse()[1].close) * self.open_order.quantity
        else:
            pl_day = (last_close - self.stocks.reverse()[1].close) * self.open_order.quantity
        pl_day_pct = round(pl_day / (self.open_order.net_price * self.open_order.quantity) * 100, 2)

        return dict(
            stage_id=stage.id,
            stage=stage.stage_name,
            status=status,
            pl_open=round(pl_open, 2),
            pl_open_pct=pl_open_pct,
            pl_day=round(pl_day, 2),
            pl_day_pct=pl_day_pct,
            enter_price=self.open_order.net_price,
            exit_price=self.close_order.net_price if self.close_order else 0.0,
            quantity=self.open_order.quantity,
            holding=self.open_order.net_price * self.open_order.quantity,
            bp_effect=self.position_instruments.last().bp_effect,
            date=(self.date + Hour(17) + Minute(30)).to_datetime().date(),
        )
示例#19
0
文件: epf.py 项目: tgjeon/EPF2016
def dataPreparation(rawdata):
    # Cut data until 2016-04-22
    rawdata = rawdata[:DATA_FOR_EXPERIMENT]

    data = pd.DataFrame(columns=range(TRAINING_WINDOW_HOURS))
    label = pd.DataFrame(columns=range(PREDICTION_WINDOW_HOURS))

    idx = pd.date_range(start=TRAINING_DURATION_START,
                        periods=MOVING_WINDOW_SIZE,
                        freq='H')

    data_duration = (
        TEST_DURATION_END -
        datetime.timedelta(TRAINING_WINDOW_DAYS)) - TRAINING_DURATION_START

    for i in range(data_duration.days):
        expdata = rawdata.loc[idx]

        data_row = (expdata[:TRAINING_WINDOW_HOURS].values).transpose()
        label_row = (expdata[TRAINING_WINDOW_HOURS:].values).transpose()

        data = data.append(pd.DataFrame(data=data_row,
                                        columns=range(TRAINING_WINDOW_HOURS)),
                           ignore_index=True)
        label = label.append(pd.DataFrame(
            data=label_row, columns=range(PREDICTION_WINDOW_HOURS)),
                             ignore_index=True)

        idx = idx + Hour(MOVING_WINDOW_STEP)

    # Split data into train and test data
    TRAINING_PERIODS = TRAINING_DURATION_END - TRAINING_DURATION_START

    train_data = data[:TRAINING_PERIODS.days].values.astype('float')
    train_label = label[:TRAINING_PERIODS.days].values.astype('float')

    test_data = data[TRAINING_PERIODS.days:].values.astype('float')
    test_label = label[TRAINING_PERIODS.days:].values.astype('float')

    return train_data, train_label, test_data, test_label
示例#20
0

#----------------------------------------------------------------------
# Offset names ("time rules") and related functions


from pandas.tseries.offsets import (Day, BDay, Hour, Minute, Second, Milli,
                                    Week, Micro, MonthEnd, MonthBegin,
                                    BMonthBegin, BMonthEnd, YearBegin, YearEnd,
                                    BYearBegin, BYearEnd, QuarterBegin,
                                    QuarterEnd, BQuarterBegin, BQuarterEnd)

_offset_map = {
    'D'     : Day(),
    'B'     : BDay(),
    'H'     : Hour(),
    'T'     : Minute(),
    'S'     : Second(),
    'L'     : Milli(),
    'U'     : Micro(),
    None    : None,

    # Monthly - Calendar
    'M'      : MonthEnd(),
    'MS'     : MonthBegin(),

    # Monthly - Business
    'BM'     : BMonthEnd(),
    'BMS'    : BMonthBegin(),

    # Annual - Calendar
示例#21
0
    'W': 'W-SUN',
    'Q': 'Q-DEC',
    'A': 'A-DEC',  # YearEnd(month=12),
    'AS': 'AS-JAN',  # YearBegin(month=1),
    'BA': 'BA-DEC',  # BYearEnd(month=12),
    'BAS': 'BAS-JAN',  # BYearBegin(month=1),
    'Min': 'T',
    'min': 'T',
    'ms': 'L',
    'us': 'U',
    'ns': 'N'
}

_name_to_offset_map = {
    'days': Day(1),
    'hours': Hour(1),
    'minutes': Minute(1),
    'seconds': Second(1),
    'milliseconds': Milli(1),
    'microseconds': Micro(1),
    'nanoseconds': Nano(1)
}

_INVALID_FREQ_ERROR = "Invalid frequency: {0}"


def to_offset(freqstr):
    """
    Return DateOffset object from string representation or
    Timedelta object
示例#22
0
def create_data():
    """ create the pickle/msgpack data """

    data = {
        'A': [0., 1., 2., 3., np.nan],
        'B': [0, 1, 0, 1, 0],
        'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
        'D': date_range('1/1/2009', periods=5),
        'E': [0., 1, Timestamp('20100101'), 'foo', 2.]
    }

    scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M'))

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10),
                 period=period_range('2013-01-01', freq='M', periods=10),
                 float=Index(np.arange(10, dtype=np.float64)),
                 uint=Index(np.arange(10, dtype=np.uint64)),
                 timedelta=timedelta_range('00:00:00', freq='30T', periods=10))

    if _loose_version >= LooseVersion('0.18'):
        from pandas import RangeIndex
        index['range'] = RangeIndex(10)

    if _loose_version >= LooseVersion('0.21'):
        from pandas import interval_range
        index['interval'] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(tuple(
        zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
              ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])),
                                          names=['first', 'second']))

    series = dict(float=Series(data['A']),
                  int=Series(data['B']),
                  mixed=Series(data['E']),
                  ts=Series(np.arange(10).astype(np.int64),
                            index=date_range('20130101', periods=10)),
                  mi=Series(np.arange(5).astype(np.float64),
                            index=MultiIndex.from_tuples(tuple(
                                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                                         names=['one',
                                                                'two'])),
                  dup=Series(np.arange(5).astype(np.float64),
                             index=['A', 'B', 'C', 'D', 'A']),
                  cat=Series(Categorical(['foo', 'bar', 'baz'])),
                  dt=Series(date_range('20130101', periods=5)),
                  dt_tz=Series(
                      date_range('20130101', periods=5, tz='US/Eastern')),
                  period=Series([Period('2000Q1')] * 5))

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = dict(float=DataFrame({
        'A': series['float'],
        'B': series['float'] + 1
    }),
                 int=DataFrame({
                     'A': series['int'],
                     'B': series['int'] + 1
                 }),
                 mixed=DataFrame({k: data[k]
                                  for k in ['A', 'B', 'C', 'D']}),
                 mi=DataFrame(
                     {
                         'A': np.arange(5).astype(np.float64),
                         'B': np.arange(5).astype(np.int64)
                     },
                     index=MultiIndex.from_tuples(tuple(
                         zip(*[['bar', 'bar', 'baz', 'baz', 'baz'],
                               ['one', 'two', 'one', 'two', 'three']])),
                                                  names=['first', 'second'])),
                 dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                               columns=['A', 'B', 'A']),
                 cat_onecol=DataFrame({'A': Categorical(['foo', 'bar'])}),
                 cat_and_float=DataFrame({
                     'A':
                     Categorical(['foo', 'bar', 'baz']),
                     'B':
                     np.arange(3).astype(np.int64)
                 }),
                 mixed_dup=mixed_dup_df,
                 dt_mixed_tzs=DataFrame(
                     {
                         'A': Timestamp('20130102', tz='US/Eastern'),
                         'B': Timestamp('20130603', tz='CET')
                     },
                     index=range(5)),
                 dt_mixed2_tzs=DataFrame(
                     {
                         'A': Timestamp('20130102', tz='US/Eastern'),
                         'B': Timestamp('20130603', tz='CET'),
                         'C': Timestamp('20130603', tz='UTC')
                     },
                     index=range(5)))

    cat = dict(int8=Categorical(list('abcdefg')),
               int16=Categorical(np.arange(1000)),
               int32=Categorical(np.arange(10000)))

    timestamp = dict(normal=Timestamp('2011-01-01'),
                     nat=NaT,
                     tz=Timestamp('2011-01-01', tz='US/Eastern'))

    if _loose_version < LooseVersion('0.19.2'):
        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
        timestamp['both'] = Timestamp('2011-01-01',
                                      tz='Asia/Tokyo',
                                      offset='M')
    else:
        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M')

    off = {
        'DateOffset': DateOffset(years=1),
        'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824),
        'BusinessDay': BusinessDay(offset=timedelta(seconds=9)),
        'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'),
        'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'),
        'SemiMonthBegin': SemiMonthBegin(day_of_month=9),
        'SemiMonthEnd': SemiMonthEnd(day_of_month=24),
        'MonthBegin': MonthBegin(1),
        'MonthEnd': MonthEnd(1),
        'QuarterBegin': QuarterBegin(1),
        'QuarterEnd': QuarterEnd(1),
        'Day': Day(1),
        'YearBegin': YearBegin(1),
        'YearEnd': YearEnd(1),
        'Week': Week(1),
        'Week_Tues': Week(2, normalize=False, weekday=1),
        'WeekOfMonth': WeekOfMonth(week=3, weekday=4),
        'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3),
        'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        'Easter': Easter(),
        'Hour': Hour(1),
        'Minute': Minute(1)
    }

    return dict(series=series,
                frame=frame,
                index=index,
                scalars=scalars,
                mi=mi,
                sp_series=dict(float=_create_sp_series(),
                               ts=_create_sp_tsseries()),
                sp_frame=dict(float=_create_sp_frame()),
                cat=cat,
                timestamp=timestamp,
                offsets=off)
示例#23
0
class TestTimedeltaIndexOps:
    def test_value_counts_unique(self):
        # GH 7735
        idx = timedelta_range("1 days 09:00:00", freq="H", periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1)))

        exp_idx = timedelta_range("1 days 18:00:00", freq="-1H", periods=10)
        exp_idx = exp_idx._with_freq(None)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")

        obj = idx
        tm.assert_series_equal(obj.value_counts(), expected)

        obj = Series(idx)
        tm.assert_series_equal(obj.value_counts(), expected)

        expected = timedelta_range("1 days 09:00:00", freq="H", periods=10)
        tm.assert_index_equal(idx.unique(), expected)

        idx = TimedeltaIndex(
            [
                "1 days 09:00:00",
                "1 days 09:00:00",
                "1 days 09:00:00",
                "1 days 08:00:00",
                "1 days 08:00:00",
                pd.NaT,
            ]
        )

        exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00"])
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00", pd.NaT])
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    def test_nonunique_contains(self):
        # GH 9512
        for idx in map(
            TimedeltaIndex,
            (
                [0, 1, 0],
                [0, 0, -1],
                [0, -1, -1],
                ["00:01:00", "00:01:00", "00:02:00"],
                ["00:01:00", "00:01:00", "00:00:01"],
            ),
        ):
            assert idx[0] in idx

    def test_unknown_attribute(self):
        # see gh-9680
        tdi = timedelta_range(start=0, periods=10, freq="1s")
        ts = Series(np.random.normal(size=10), index=tdi)
        assert "foo" not in ts.__dict__.keys()
        msg = "'Series' object has no attribute 'foo'"
        with pytest.raises(AttributeError, match=msg):
            ts.foo

    def test_order(self):
        # GH 10295
        idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"], freq="D", name="idx")
        idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"], freq="H", name="idx")

        for idx in [idx1, idx2]:
            ordered = idx.sort_values()
            tm.assert_index_equal(ordered, idx)
            assert ordered.freq == idx.freq

            ordered = idx.sort_values(ascending=False)
            expected = idx[::-1]
            tm.assert_index_equal(ordered, expected)
            assert ordered.freq == expected.freq
            assert ordered.freq.n == -1

            ordered, indexer = idx.sort_values(return_indexer=True)
            tm.assert_index_equal(ordered, idx)
            tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
            assert ordered.freq == idx.freq

            ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
            tm.assert_index_equal(ordered, idx[::-1])
            assert ordered.freq == expected.freq
            assert ordered.freq.n == -1

        idx1 = TimedeltaIndex(
            ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1"
        )
        exp1 = TimedeltaIndex(
            ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1"
        )

        idx2 = TimedeltaIndex(
            ["1 day", "3 day", "5 day", "2 day", "1 day"], name="idx2"
        )

        for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]:
            ordered = idx.sort_values()
            tm.assert_index_equal(ordered, expected)
            assert ordered.freq is None

            ordered = idx.sort_values(ascending=False)
            tm.assert_index_equal(ordered, expected[::-1])
            assert ordered.freq is None

            ordered, indexer = idx.sort_values(return_indexer=True)
            tm.assert_index_equal(ordered, expected)

            exp = np.array([0, 4, 3, 1, 2])
            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
            assert ordered.freq is None

            ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
            tm.assert_index_equal(ordered, expected[::-1])

            exp = np.array([2, 1, 3, 0, 4])
            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
            assert ordered.freq is None

    def test_drop_duplicates_metadata(self, freq_sample):
        # GH 10115
        idx = timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)
        assert idx_dup.freq is None  # freq is reset
        result = idx_dup.drop_duplicates()
        expected = idx._with_freq(None)
        tm.assert_index_equal(expected, result)
        assert result.freq is None

    @pytest.mark.parametrize(
        "keep, expected, index",
        [
            ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
            ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
            (
                False,
                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
                np.arange(5, 10),
            ),
        ],
    )
    def test_drop_duplicates(self, freq_sample, keep, expected, index):
        # to check Index/Series compat
        idx = timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
        idx = idx.append(idx[:5])

        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
        expected = idx[~expected]

        result = idx.drop_duplicates(keep=keep)
        tm.assert_index_equal(result, expected)

        result = Series(idx).drop_duplicates(keep=keep)
        tm.assert_series_equal(result, Series(expected, index=index))

    def test_infer_freq(self, freq_sample):
        # GH#11018
        idx = timedelta_range("1", freq=freq_sample, periods=10)
        result = TimedeltaIndex(idx.asi8, freq="infer")
        tm.assert_index_equal(idx, result)
        assert result.freq == freq_sample

    def test_repeat(self):
        index = timedelta_range("1 days", periods=2, freq="D")
        exp = TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"])
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = TimedeltaIndex(["1 days", "NaT", "3 days"])
        exp = TimedeltaIndex(
            [
                "1 days",
                "1 days",
                "1 days",
                "NaT",
                "NaT",
                "NaT",
                "3 days",
                "3 days",
                "3 days",
            ]
        )
        for res in [index.repeat(3), np.repeat(index, 3)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

    def test_nat(self):
        assert TimedeltaIndex._na_value is pd.NaT
        assert TimedeltaIndex([])._na_value is pd.NaT

        idx = TimedeltaIndex(["1 days", "2 days"])
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert idx.hasnans is False
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = TimedeltaIndex(["1 days", "NaT"])
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans is True
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))

    @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []])
    @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)])
    def test_freq_setter(self, values, freq):
        # GH 20678
        idx = TimedeltaIndex(values)

        # can set to an offset, converting from string if necessary
        idx._data.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, DateOffset)

        # can reset to None
        idx._data.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = TimedeltaIndex(["0 days", "2 days", "4 days"])

        # setting with an incompatible freq
        msg = (
            "Inferred frequency 2D from passed values does not conform to "
            "passed frequency 5D"
        )
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "5D"

        # setting with a non-fixed frequency
        msg = r"<2 \* BusinessDays> is a non-fixed frequency"
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "2B"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx._data.freq = "foo"

    def test_freq_view_safe(self):
        # Setting the freq for one TimedeltaIndex shouldn't alter the freq
        #  for another that views the same data

        tdi = TimedeltaIndex(["0 days", "2 days", "4 days"], freq="2D")
        tda = tdi._data

        tdi2 = TimedeltaIndex(tda)._with_freq(None)
        assert tdi2.freq is None

        # Original was not altered
        assert tdi.freq == "2D"
        assert tda.freq == "2D"
示例#24
0
class TestDatetimeIndexOps:
    def test_ops_properties_basic(self, datetime_series):

        # sanity check that the behavior didn't change
        # GH#7206
        for op in ["year", "day", "second", "weekday"]:
            msg = f"'Series' object has no attribute '{op}'"
            with pytest.raises(AttributeError, match=msg):
                getattr(datetime_series, op)

        # attribute access should still work!
        s = Series(dict(year=2000, month=1, day=10))
        assert s.year == 2000
        assert s.month == 1
        assert s.day == 10
        msg = "'Series' object has no attribute 'weekday'"
        with pytest.raises(AttributeError, match=msg):
            s.weekday

    def test_repeat_range(self, tz_naive_fixture):
        tz = tz_naive_fixture
        rng = date_range("1/1/2000", "1/1/2001")

        result = rng.repeat(5)
        assert result.freq is None
        assert len(result) == 5 * len(rng)

        index = pd.date_range("2001-01-01", periods=2, freq="D", tz=tz)
        exp = DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz)
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = pd.date_range("2001-01-01", periods=2, freq="2D", tz=tz)
        exp = DatetimeIndex(
            ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz)
        for res in [index.repeat(2), np.repeat(index, 2)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

        index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz)
        exp = DatetimeIndex(
            [
                "2001-01-01",
                "2001-01-01",
                "2001-01-01",
                "NaT",
                "NaT",
                "NaT",
                "2003-01-01",
                "2003-01-01",
                "2003-01-01",
            ],
            tz=tz,
        )
        for res in [index.repeat(3), np.repeat(index, 3)]:
            tm.assert_index_equal(res, exp)
            assert res.freq is None

    def test_repeat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        reps = 2
        msg = "the 'axis' parameter is not supported"

        rng = pd.date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz)

        expected_rng = DatetimeIndex([
            Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
            Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
            Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
            Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
        ])

        res = rng.repeat(reps)
        tm.assert_index_equal(res, expected_rng)
        assert res.freq is None

        tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
        with pytest.raises(ValueError, match=msg):
            np.repeat(rng, reps, axis=1)

    def test_resolution(self, tz_naive_fixture):
        tz = tz_naive_fixture
        for freq, expected in zip(
            ["A", "Q", "M", "D", "H", "T", "S", "L", "U"],
            [
                "day",
                "day",
                "day",
                "day",
                "hour",
                "minute",
                "second",
                "millisecond",
                "microsecond",
            ],
        ):
            idx = pd.date_range(start="2013-04-01",
                                periods=30,
                                freq=freq,
                                tz=tz)
            assert idx.resolution == expected

    def test_value_counts_unique(self, tz_naive_fixture):
        tz = tz_naive_fixture
        # GH 7735
        idx = pd.date_range("2011-01-01 09:00", freq="H", periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = DatetimeIndex(np.repeat(idx.values, range(1,
                                                        len(idx) + 1)),
                            tz=tz)

        exp_idx = pd.date_range("2011-01-01 18:00",
                                freq="-1H",
                                periods=10,
                                tz=tz)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
        expected.index = expected.index._with_freq(None)

        for obj in [idx, Series(idx)]:

            tm.assert_series_equal(obj.value_counts(), expected)

        expected = pd.date_range("2011-01-01 09:00",
                                 freq="H",
                                 periods=10,
                                 tz=tz)
        expected = expected._with_freq(None)
        tm.assert_index_equal(idx.unique(), expected)

        idx = DatetimeIndex(
            [
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 08:00",
                "2013-01-01 08:00",
                pd.NaT,
            ],
            tz=tz,
        )

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"],
                                tz=tz)
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = DatetimeIndex(
            ["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz)
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    @pytest.mark.parametrize(
        "idx",
        [
            DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"],
                          freq="D",
                          name="idx"),
            DatetimeIndex(
                ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
                freq="H",
                name="tzidx",
                tz="Asia/Tokyo",
            ),
        ],
    )
    def test_order_with_freq(self, idx):
        ordered = idx.sort_values()
        tm.assert_index_equal(ordered, idx)
        assert ordered.freq == idx.freq

        ordered = idx.sort_values(ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

        ordered, indexer = idx.sort_values(return_indexer=True)
        tm.assert_index_equal(ordered, idx)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([0, 1, 2]),
                                    check_dtype=False)
        assert ordered.freq == idx.freq

        ordered, indexer = idx.sort_values(return_indexer=True,
                                           ascending=False)
        expected = idx[::-1]
        tm.assert_index_equal(ordered, expected)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([2, 1, 0]),
                                    check_dtype=False)
        assert ordered.freq == expected.freq
        assert ordered.freq.n == -1

    @pytest.mark.parametrize(
        "index_dates,expected_dates",
        [
            (
                [
                    "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02",
                    "2011-01-01"
                ],
                [
                    "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03",
                    "2011-01-05"
                ],
            ),
            (
                [
                    "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02",
                    "2011-01-01"
                ],
                [
                    "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03",
                    "2011-01-05"
                ],
            ),
            (
                [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT],
                [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
            ),
        ],
    )
    def test_order_without_freq(self, index_dates, expected_dates,
                                tz_naive_fixture):
        tz = tz_naive_fixture

        # without freq
        index = DatetimeIndex(index_dates, tz=tz, name="idx")
        expected = DatetimeIndex(expected_dates, tz=tz, name="idx")

        ordered = index.sort_values(na_position="first")
        tm.assert_index_equal(ordered, expected)
        assert ordered.freq is None

        ordered = index.sort_values(ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True,
                                             na_position="first")
        tm.assert_index_equal(ordered, expected)

        exp = np.array([0, 4, 3, 1, 2])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

        ordered, indexer = index.sort_values(return_indexer=True,
                                             ascending=False)
        tm.assert_index_equal(ordered, expected[::-1])

        exp = np.array([2, 1, 3, 0, 4])
        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
        assert ordered.freq is None

    def test_drop_duplicates_metadata(self, freq_sample):
        # GH 10115
        idx = pd.date_range("2011-01-01",
                            freq=freq_sample,
                            periods=10,
                            name="idx")
        result = idx.drop_duplicates()
        tm.assert_index_equal(idx, result)
        assert idx.freq == result.freq

        idx_dup = idx.append(idx)
        assert idx_dup.freq is None  # freq is reset
        result = idx_dup.drop_duplicates()
        expected = idx._with_freq(None)
        tm.assert_index_equal(result, expected)
        assert result.freq is None

    @pytest.mark.parametrize(
        "keep, expected, index",
        [
            ("first", np.concatenate(
                ([False] * 10, [True] * 5)), np.arange(0, 10)),
            ("last", np.concatenate(
                ([True] * 5, [False] * 10)), np.arange(5, 15)),
            (
                False,
                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
                np.arange(5, 10),
            ),
        ],
    )
    def test_drop_duplicates(self, freq_sample, keep, expected, index):
        # to check Index/Series compat
        idx = pd.date_range("2011-01-01",
                            freq=freq_sample,
                            periods=10,
                            name="idx")
        idx = idx.append(idx[:5])

        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
        expected = idx[~expected]

        result = idx.drop_duplicates(keep=keep)
        tm.assert_index_equal(result, expected)

        result = Series(idx).drop_duplicates(keep=keep)
        tm.assert_series_equal(result, Series(expected, index=index))

    def test_infer_freq(self, freq_sample):
        # GH 11018
        idx = pd.date_range("2011-01-01 09:00:00",
                            freq=freq_sample,
                            periods=10)
        result = DatetimeIndex(idx.asi8, freq="infer")
        tm.assert_index_equal(idx, result)
        assert result.freq == freq_sample

    def test_nat(self, tz_naive_fixture):
        tz = tz_naive_fixture
        assert DatetimeIndex._na_value is pd.NaT
        assert DatetimeIndex([])._na_value is pd.NaT

        idx = DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
        assert idx.hasnans is False
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))

        idx = DatetimeIndex(["2011-01-01", "NaT"], tz=tz)
        assert idx._can_hold_na

        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
        assert idx.hasnans is True
        tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1],
                                                            dtype=np.intp))

    def test_equals(self):
        # GH 13107
        idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(Series(idx))

        idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"],
                             tz="US/Pacific")
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.equals(list(idx2))
        assert not idx.equals(Series(idx2))

        # same internal, different tz
        idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific")
        tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
        assert not idx.equals(idx3)
        assert not idx.equals(idx3.copy())
        assert not idx.equals(idx3.astype(object))
        assert not idx.astype(object).equals(idx3)
        assert not idx.equals(list(idx3))
        assert not idx.equals(Series(idx3))

        # check that we do not raise when comparing with OutOfBounds objects
        oob = Index([datetime(2500, 1, 1)] * 3, dtype=object)
        assert not idx.equals(oob)
        assert not idx2.equals(oob)
        assert not idx3.equals(oob)

        # check that we do not raise when comparing with OutOfBounds dt64
        oob2 = oob.map(np.datetime64)
        assert not idx.equals(oob2)
        assert not idx2.equals(oob2)
        assert not idx3.equals(oob2)

    @pytest.mark.parametrize("values",
                             [["20180101", "20180103", "20180105"], []])
    @pytest.mark.parametrize(
        "freq", ["2D", Day(2), "2B",
                 BDay(2), "48H", Hour(48)])
    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_freq_setter(self, values, freq, tz):
        # GH 20678
        idx = DatetimeIndex(values, tz=tz)

        # can set to an offset, converting from string if necessary
        idx._data.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, DateOffset)

        # can reset to None
        idx._data.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = DatetimeIndex(["20180101", "20180103", "20180105"])

        # setting with an incompatible freq
        msg = ("Inferred frequency 2D from passed values does not conform to "
               "passed frequency 5D")
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "5D"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx._data.freq = "foo"

    def test_freq_view_safe(self):
        # Setting the freq for one DatetimeIndex shouldn't alter the freq
        #  for another that views the same data

        dti = pd.date_range("2016-01-01", periods=5)
        dta = dti._data

        dti2 = DatetimeIndex(dta)._with_freq(None)
        assert dti2.freq is None

        # Original was not altered
        assert dti.freq == "D"
        assert dta.freq == "D"
示例#25
0
class TestDatetimeIndexOps:
    def test_ops_properties_basic(self, datetime_series):

        # sanity check that the behavior didn't change
        # GH#7206
        for op in ["year", "day", "second", "weekday"]:
            msg = f"'Series' object has no attribute '{op}'"
            with pytest.raises(AttributeError, match=msg):
                getattr(datetime_series, op)

        # attribute access should still work!
        s = Series({"year": 2000, "month": 1, "day": 10})
        assert s.year == 2000
        assert s.month == 1
        assert s.day == 10
        msg = "'Series' object has no attribute 'weekday'"
        with pytest.raises(AttributeError, match=msg):
            s.weekday

    @pytest.mark.parametrize(
        "freq,expected",
        [
            ("A", "day"),
            ("Q", "day"),
            ("M", "day"),
            ("D", "day"),
            ("H", "hour"),
            ("T", "minute"),
            ("S", "second"),
            ("L", "millisecond"),
            ("U", "microsecond"),
        ],
    )
    def test_resolution(self, request, tz_naive_fixture, freq, expected):
        tz = tz_naive_fixture
        if freq == "A" and not IS64 and isinstance(tz, tzlocal):
            request.node.add_marker(
                pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038")
            )

        idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz)
        assert idx.resolution == expected

    def test_value_counts_unique(self, tz_naive_fixture):
        tz = tz_naive_fixture
        # GH 7735
        idx = date_range("2011-01-01 09:00", freq="H", periods=10)
        # create repeated values, 'n'th element is repeated by n+1 times
        idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz)

        exp_idx = date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz)
        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
        expected.index = expected.index._with_freq(None)

        for obj in [idx, Series(idx)]:

            tm.assert_series_equal(obj.value_counts(), expected)

        expected = date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz)
        expected = expected._with_freq(None)
        tm.assert_index_equal(idx.unique(), expected)

        idx = DatetimeIndex(
            [
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 09:00",
                "2013-01-01 08:00",
                "2013-01-01 08:00",
                pd.NaT,
            ],
            tz=tz,
        )

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz)
        expected = Series([3, 2], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(), expected)

        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz)
        expected = Series([3, 2, 1], index=exp_idx)

        for obj in [idx, Series(idx)]:
            tm.assert_series_equal(obj.value_counts(dropna=False), expected)

        tm.assert_index_equal(idx.unique(), exp_idx)

    def test_infer_freq(self, freq_sample):
        # GH 11018
        idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
        result = DatetimeIndex(idx.asi8, freq="infer")
        tm.assert_index_equal(idx, result)
        assert result.freq == freq_sample

    @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
    @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)])
    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_freq_setter(self, values, freq, tz):
        # GH 20678
        idx = DatetimeIndex(values, tz=tz)

        # can set to an offset, converting from string if necessary
        idx._data.freq = freq
        assert idx.freq == freq
        assert isinstance(idx.freq, DateOffset)

        # can reset to None
        idx._data.freq = None
        assert idx.freq is None

    def test_freq_setter_errors(self):
        # GH 20678
        idx = DatetimeIndex(["20180101", "20180103", "20180105"])

        # setting with an incompatible freq
        msg = (
            "Inferred frequency 2D from passed values does not conform to "
            "passed frequency 5D"
        )
        with pytest.raises(ValueError, match=msg):
            idx._data.freq = "5D"

        # setting with non-freq string
        with pytest.raises(ValueError, match="Invalid frequency"):
            idx._data.freq = "foo"

    def test_freq_view_safe(self):
        # Setting the freq for one DatetimeIndex shouldn't alter the freq
        #  for another that views the same data

        dti = date_range("2016-01-01", periods=5)
        dta = dti._data

        dti2 = DatetimeIndex(dta)._with_freq(None)
        assert dti2.freq is None

        # Original was not altered
        assert dti.freq == "D"
        assert dta.freq == "D"
示例#26
0
def test_Hour():
    assert_offset_equal(Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 1))
    assert_offset_equal(Hour(-1), datetime(2010, 1, 1, 1),
                        datetime(2010, 1, 1))
    assert_offset_equal(2 * Hour(), datetime(2010, 1, 1),
                        datetime(2010, 1, 1, 2))
    assert_offset_equal(-1 * Hour(), datetime(2010, 1, 1, 1),
                        datetime(2010, 1, 1))

    assert Hour(3) + Hour(2) == Hour(5)
    assert Hour(3) - Hour(2) == Hour()

    assert Hour(4) != Hour(1)
示例#27
0
def test_Day_equals_24_Hours():
    ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki')
    result = ts + Day(1)
    expected = ts + Hour(24)
    assert result == expected
示例#28
0
index = pd.date_range(start='20120401', periods=25)
index = pd.date_range(end='20120601', periods=20)
'''
起始和结束日期定义了日期索引的严格边界,如果想要生成一个由每月最后一个工作日组成的日期索引,
可以传入BM频率,business end of month,
'''
index = pd.date_range('1/1/2000', '12/1/2000', freq='BM')

# 频率和日期偏移量
'''
pandas中的频率是由一个基础频率和一个乘数组成的,基础频率通常以一个字符串别名表示,'M','H'
等,对于每个基础频率,都有一个被称为日期偏移量的对象与之对应
'''
from pandas.tseries.offsets import Hour, Minute

hour = Hour()
four_hours = Hour(4)
# 一般无需显式创建这样的对象,只需使用如'H','4H'等字符串别名即可
index = pd.date_range('20000105', '20000106', freq='4H')
# 大部分偏移量对象可通过加法进行连接
interval = Hour(2) + Minute(25)
# 同理,也可以传入频率字符串,’2h30min',可被解析为等效的表达式
index = pd.date_range('20000105', '20000106', freq='4h15min')
# WOM日期,week of month,是一种非常实用的频率类,以WOM开头,可以获得诸如 每月第三个星期五之类的日期
rng = pd.date_range('20000105', '20010106', freq='WOM-3MON')

# 移动(超前和滞后)数据
'''
移动指的是沿着时间轴将数据前移或后移,Series和dataframe都有一个shift方法用于执行单纯的
前移或后移操作,保持索引不变
'''
示例#29
0
文件: temp.py 项目: liudengfeng/cnswd
def time_for_next_update(last_time, freq='D', num=9, is_end=False):
    """前次更新后下一次更新时间

    Arguments:
        last_time {obj} -- 上次时间

    Keyword Arguments:
        freq {str} -- 更新周期 (default: {'D'})
        num {int} -- 日级别以下为单位数,以上为小时数 (default: {9})
        is_end {bool} -- 是否为周期尾部 (default: {False})

    Raises:
        TypeError: 不能识别的周期类型

    Returns:
        Timestamp -- 下一次更新时间

    Notes:
        一、 freq < D
            `num`代表周期数
            上一时点`normalize`后移动`num`周期,不考虑开始及结束问题
        二、 freq in D、B
            `num`代表小时
            对于历史时间,上一时点`normalize`后一律移动到下一个周期,且将小时调整到指定的num
            如上一时点其日期为当前日期,且在其`normalize`及调整小时后的值晚于上一时点,则取调整后的值
        三、 freq > D 开始及结束才有效
            `num`无效
            如周初、周末、月初、月末、季初、季末、年初、年末
            此时num数字不起作用
    """
    valid_freq = ('B', 'D', 'W', 'M', 'Q', 'H', 'MIN')
    if pd.isnull(last_time):
        return pd.Timestamp(MARKET_START)
    assert isinstance(
        last_time, pd.Timestamp), f'类型错误,希望Timestamp,实际为{type(last_time)}'
    now = pd.Timestamp.now(tz=last_time.tz)
    assert last_time <= now, '过去时间必须小于当前时间'
    freq = freq.upper()
    if freq == 'MIN':
        offset = Minute(n=num)
        return offset.apply(last_time.floor(freq))
    if freq == 'H':
        offset = Hour(n=num)
        return offset.apply(last_time.floor(freq))
    if freq == 'D':
        # √ 此处要考虑小时数
        limit = last_time.floor(freq).replace(hour=num)
        if last_time < limit:
            return limit
        else:
            offset = Day()
            return offset.apply(last_time.floor(freq)).replace(hour=num)
    if freq == 'B':
        offset = BDay()
        # 工作日
        if last_time.weekday() in range(0, 5):
            # √ 此处要考虑小时数
            limit = last_time.normalize().replace(hour=num)
            if last_time < limit:
                return limit
            else:
                return offset.apply(last_time.normalize()).replace(hour=num)
        else:
            return offset.apply(last_time.normalize()).replace(hour=num)
    if freq == 'W':
        nw = last_time.normalize() + pd.Timedelta(weeks=1)
        if is_end:
            return nw + pd.Timedelta(days=7-nw.weekday()) - pd.Timedelta(nanoseconds=1)
        else:
            return nw - pd.Timedelta(days=nw.weekday())
    if freq == 'M':
        if is_end:
            offset = MonthEnd(n=2)
            res = offset.apply(last_time.normalize())
            if last_time.is_month_end:
                res = offset.rollback(res)
            return res
        else:
            offset = MonthBegin()
            return offset.apply(last_time.normalize())
    if freq == 'Q':
        if is_end:
            offset = QuarterEnd(n=2, startingMonth=3, normalize=True)
            res = offset.apply(last_time)
            if last_time.is_quarter_end:
                offset = QuarterEnd(n=-1, startingMonth=3, normalize=True)
                res = offset.apply(res)
            return res
        else:
            offset = QuarterBegin(n=1, normalize=True, startingMonth=1)
            return offset.apply(last_time)
    if freq == 'Y':
        if last_time.year == now.year:
            if is_end:
                return last_time.normalize().replace(year=now.year, month=12, day=31)
            else:
                return last_time.normalize().replace(year=now.year, month=1, day=1)
        if is_end:
            offset = YearEnd(normalize=True, month=12, n=2)
            res = offset.apply(last_time)
            if last_time.is_year_end:
                offset = YearEnd(n=-1, month=12, normalize=True)
                res = offset.apply(res)
            return res
        else:
            offset = YearBegin(normalize=True, month=1, n=1)
            return offset.apply(last_time)
    raise ValueError('不能识别的周期类型,仅接受{}。实际输入为{}'.format(
        valid_freq, freq))
def create_data():
    """ create the pickle/msgpack data """

    data = {
        "A": [0.0, 1.0, 2.0, 3.0, np.nan],
        "B": [0, 1, 0, 1, 0],
        "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
        "D": date_range("1/1/2009", periods=5),
        "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
    }

    scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M"))

    index = dict(
        int=Index(np.arange(10)),
        date=date_range("20130101", periods=10),
        period=period_range("2013-01-01", freq="M", periods=10),
        float=Index(np.arange(10, dtype=np.float64)),
        uint=Index(np.arange(10, dtype=np.uint64)),
        timedelta=timedelta_range("00:00:00", freq="30T", periods=10),
    )

    index["range"] = RangeIndex(10)

    if _loose_version >= LooseVersion("0.21"):
        from pandas import interval_range

        index["interval"] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(
        tuple(
            zip(*[
                ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
                ["one", "two", "one", "two", "one", "two", "one", "two"],
            ])),
        names=["first", "second"],
    ))

    series = dict(
        float=Series(data["A"]),
        int=Series(data["B"]),
        mixed=Series(data["E"]),
        ts=Series(np.arange(10).astype(np.int64),
                  index=date_range("20130101", periods=10)),
        mi=Series(
            np.arange(5).astype(np.float64),
            index=MultiIndex.from_tuples(tuple(
                zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])),
                                         names=["one", "two"]),
        ),
        dup=Series(np.arange(5).astype(np.float64),
                   index=["A", "B", "C", "D", "A"]),
        cat=Series(Categorical(["foo", "bar", "baz"])),
        dt=Series(date_range("20130101", periods=5)),
        dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")),
        period=Series([Period("2000Q1")] * 5),
    )

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = dict(
        float=DataFrame({
            "A": series["float"],
            "B": series["float"] + 1
        }),
        int=DataFrame({
            "A": series["int"],
            "B": series["int"] + 1
        }),
        mixed=DataFrame({k: data[k]
                         for k in ["A", "B", "C", "D"]}),
        mi=DataFrame(
            {
                "A": np.arange(5).astype(np.float64),
                "B": np.arange(5).astype(np.int64)
            },
            index=MultiIndex.from_tuples(
                tuple(
                    zip(*[
                        ["bar", "bar", "baz", "baz", "baz"],
                        ["one", "two", "one", "two", "three"],
                    ])),
                names=["first", "second"],
            ),
        ),
        dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                      columns=["A", "B", "A"]),
        cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}),
        cat_and_float=DataFrame({
            "A": Categorical(["foo", "bar", "baz"]),
            "B": np.arange(3).astype(np.int64),
        }),
        mixed_dup=mixed_dup_df,
        dt_mixed_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
            },
            index=range(5),
        ),
        dt_mixed2_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
                "C": Timestamp("20130603", tz="UTC"),
            },
            index=range(5),
        ),
    )

    cat = dict(
        int8=Categorical(list("abcdefg")),
        int16=Categorical(np.arange(1000)),
        int32=Categorical(np.arange(10000)),
    )

    timestamp = dict(
        normal=Timestamp("2011-01-01"),
        nat=NaT,
        tz=Timestamp("2011-01-01", tz="US/Eastern"),
    )

    timestamp["freq"] = Timestamp("2011-01-01", freq="D")
    timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M")

    off = {
        "DateOffset": DateOffset(years=1),
        "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
        "BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
        "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
        "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
        "SemiMonthBegin": SemiMonthBegin(day_of_month=9),
        "SemiMonthEnd": SemiMonthEnd(day_of_month=24),
        "MonthBegin": MonthBegin(1),
        "MonthEnd": MonthEnd(1),
        "QuarterBegin": QuarterBegin(1),
        "QuarterEnd": QuarterEnd(1),
        "Day": Day(1),
        "YearBegin": YearBegin(1),
        "YearEnd": YearEnd(1),
        "Week": Week(1),
        "Week_Tues": Week(2, normalize=False, weekday=1),
        "WeekOfMonth": WeekOfMonth(week=3, weekday=4),
        "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
        "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        "Easter": Easter(),
        "Hour": Hour(1),
        "Minute": Minute(1),
    }

    return dict(
        series=series,
        frame=frame,
        index=index,
        scalars=scalars,
        mi=mi,
        sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()),
        sp_frame=dict(float=_create_sp_frame()),
        cat=cat,
        timestamp=timestamp,
        offsets=off,
    )