def slide8(): import pytz print pytz.common_timezones[-5:] print 'US/Eastern' tz = pytz.timezone('US/Eastern') print tz rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D') ts = Series(np.random.randn(len(rng)), index=rng) print ts.index.tz print 'date_range utc' print pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC') print 'tz_localize to UTC' ts_utc = ts.tz_localize('UTC') print ts_utc print ts_utc.index print 'tz_convert to us/Eastern' print ts_utc.tz_convert('US/Eastern') print 'tz_localize to us/Eastern' ts_eastern = ts.tz_localize('US/Eastern') print ts_eastern.tz_convert('UTC') print 'tz_convert' print ts_eastern.tz_convert('Europe/Berlin') stamp = pd.Timestamp('2011-03-12 04:00') stamp_utc = stamp.tz_localize('utc') print 'us/eastern' print stamp_utc.tz_convert('US/Eastern') stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow') print 'moscow' print stamp_moscow print 'nano seconds' print stamp_utc.value print stamp_utc.tz_convert('US/Eastern').value from pandas.tseries.offsets import Hour stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern') print stamp print '+hour' print stamp + Hour() stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern') print 'summer time' print stamp + 2 * Hour() print 'between different time zones' rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B') ts = Series(np.random.randn(len(rng)), index=rng) print ts ts1 = ts[:7].tz_localize('Europe/London') ts2 = ts1[2:].tz_convert('Europe/Moscow') result = ts1 + ts2 print result.index
def send_report(id_list): # Формирую график за сутки ------------------------------------- try: make_graph('2T', Hour(24)) for i in id_list: url = f"https://api.telegram.org/bot{token}/sendPhoto" files = {'photo': open('1.png', 'rb')} data = {'chat_id': i} session.post(url, files=files, data=data) with open('config.json', 'r') as f: jdata = json.load(f) with open('config.json', 'w') as f: jdata['report_today'] = True json.dump(jdata, f, indent=4) except Exception as e: syslog_to_csv('\n\nНеудачная отправка send_report\n\n') syslog_to_csv(e) # Формирую суточный отчёт в файл DOC_report24.csv try: data_dict = {} report_file_name = 'DOC_report24.csv' if not os.path.exists(report_file_name): with open(report_file_name, 'w') as f: f.write(','.join([ 'DateTime', 'ObjectConsuption', 'MachinesPower', 'Import', 'MWh' ]) + '\n') df = pd.read_csv(data_file, parse_dates=['Дата Время'], index_col=['Дата Время']) data_sample = df[df.index[-1] - Hour(24):] data_dict['DateTime'] = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M') data_dict['ObjectConsuption'] = int( data_sample['Мощность завода'].mean()) data_dict['MachinesPower'] = int( data_sample['Сумм мощность ГПГУ'].mean()) data_dict['Import'] = int(data_sample['MainsImport'].mean()) data_dict['MWh'] = data_sample['MWh'][-1] - data_sample['MWh'][0] to_csv( report_file_name, ['DateTime', 'ObjectConsuption', 'MachinesPower', 'Import', 'MWh'], data_dict) except Exception as e: print('Неудачное сохранение суточного отчёта в файл DOC_report24.csv') syslog_to_csv( '\n\nНеудачное сохранение суточного отчёта в файл DOC_report24.csv\n\n' ) syslog_to_csv(e)
def date_parse(time): tz = time.split(' ')[2] dt = parser.parse(time) if tz == 'EDT': return dt + Hour(12) elif tz == 'EST': return dt + Hour(13) elif tz == 'PDT': return dt + Hour(15) elif tz == 'PST': return dt + Hour(16) else: print('缺少时区:%s' % tz)
def create_flux_ts(thresh_file, bin_width, area): # start by loading threshold data bins = str(int(bin_width / 60)) + 'T' names = ['id', 'jul', 'RE', 'FE', 'timeOverThresh'] skiprows = f.linesToSkip('data/thresh/' + thresh_file + '.thresh') df = pd.read_csv('data/thresh/' + thresh_file + '.thresh', skiprows=skiprows, names=names, delim_whitespace=True) df['date/times'] = df['jul'] + df['RE'] start = df['RE'][0] - 0.5 df['date/times'] = pd.to_datetime(map(f.get_date_time, df['date/times'])) df.index = df['date/times'] flux_ts = pd.Series(data=df['timeOverThresh'], index=df.index) flux_ts = flux_ts.resample(bins).count() * (1 / ((bin_width / 60) * area)) offset_hours = (int(bin_width / 2) + int(start * 86400)) // 3600 offset_minutes = (int(bin_width / 2) + int(start * 86400) - offset_hours * 3600) // 60 offset_seconds = int(bin_width / 2) + int( start * 86400) - offset_hours * 3600 - offset_minutes * 60 offset = offset_hours * Hour() + offset_minutes * Minute( ) + offset_seconds * Second() flux_ts.index += offset
def DP(iteraciones): dias = [] dia = 0 for index, row in ts.iterrows(): start_boundary = index end_boundary = index + Hour() time_count = pd.Timedelta('0 m') for _, raw_data in df.iterrows(): start_time = raw_data['Tiempo Inicio'] end_time = raw_data['Tiempo final'] if end_time > start_boundary: if start_time < end_boundary: if start_time <= start_boundary: if end_time >= end_boundary: time_count = time_count + (end_boundary - start_boundary) else: time_count = time + (end_time - start_boundary) else: if end_time >= end_boundary: time_count = time_count + (end_boundary - start_time) else: time_count = time_count + (end_time - start_time) ts.at[index, 'minuto'] = time_count.seconds / 60 print(ts) print(df) print("calcular", (ts.at[index, 'minuto']) / 100) for i in range(iteraciones): for k in range(len(pesimista)): h = plt.hist(np.random.triangular(optimista[k], probable[k], pesimista[k], 100000), bins=200, density=True) plt.show()
def test_offset(self): stamp = pd.Timestamp('2018-03-25 01:30', tz='Europe/Paris') self.assertTrue( (stamp == pd.DatetimeIndex(['2018-03-25 01:30:00+01:00'])).all()) self.assertTrue( (stamp + Hour() == pd.DatetimeIndex(['2018-03-25 03:30:00+02:00' ])).all())
def mergeDateWithTime(dateSeries, timeSeries): "date in Timestamp ####-##-##, time in ##-##" templist = [] for i in range(len(dateSeries)): temp = timeSeries[i].split(":") time = dateSeries[i] + Hour(temp[0]) + Minute(temp[1]) templist.append(time) return Series(templist)
def test_combined_offset(self): expected = pd.DatetimeIndex( ['2000-01-01 00:00:00', '2000-01-01 06:30:00', '2000-01-01 13:00:00', '2000-01-01 19:30:00']) self.assertTrue((expected == pd.date_range('2000-01-01', '2000-01-02', freq='6h30min')).all()) self.assertTrue((pd.date_range('2000-01-01', '2000-01-02', freq='6h30min') == pd.date_range('2000-01-01', '2000-01-02', freq=Hour(6) + Minute(30))).all())
def test_simple_offset(self): expected = pd.DatetimeIndex( ['2000-01-01 00:00:00', '2000-01-01 06:00:00', '2000-01-01 12:00:00', '2000-01-01 18:00:00', '2000-01-02 00:00:00']) self.assertTrue((expected == pd.date_range('2000-01-01', '2000-01-02', freq='6h')).all()) self.assertTrue((pd.date_range('2000-01-01', '2000-01-02', freq='6h') == pd.date_range('2000-01-01', '2000-01-02', freq=Hour(6))).all())
def slide7(): from pandas.tseries.offsets import Hour, Minute hour = Hour() print hour four_hours = Hour(4) print four_hours print pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h') print Hour(2) + Minute(30) print pd.date_range('1/1/2000', periods=10, freq='1h30min') ts = Series(np.random.randn(4), index=pd.date_range('1/1/2000', periods=4, freq='M')) print ts print ts.shift(2) print ts.shift(-2) print '2 M' print ts.shift(2, freq='M') print '3 D' print ts.shift(3, freq='D') print '1 3D' print ts.shift(1, freq='3D') print '1 90T' print ts.shift(1, freq='90T') print 'shifting dates with offsets' from pandas.tseries.offsets import Day, MonthEnd now = datetime(2011, 11, 17) print now + 3 * Day() print now + MonthEnd() print now + MonthEnd(2) offset = MonthEnd() print offset print offset.rollforward(now) print offset.rollback(now) ts = Series(np.random.randn(20), index=pd.date_range('1/15/2000', periods=20, freq='4d')) print ts.groupby(offset.rollforward).mean()
class FromDictwithTimestamp: params = [Nano(1), Hour(1)] param_names = ["offset"] def setup(self, offset): N = 10**3 idx = date_range(Timestamp("1/1/1900"), freq=offset, periods=N) df = DataFrame(np.random.randn(N, 10), index=idx) self.d = df.to_dict() def time_dict_with_timestamp_offsets(self, offset): DataFrame(self.d)
class FromDictwithTimestamp(object): params = [Nano(1), Hour(1)] param_names = ['offset'] def setup(self, offset): N = 10**3 np.random.seed(1234) idx = date_range(Timestamp('1/1/1900'), freq=offset, periods=N) df = DataFrame(np.random.randn(N, 10), index=idx) self.d = df.to_dict() def time_dict_with_timestamp_offsets(self, offset): DataFrame(self.d)
def create_flux_ts(thresh_file, bin_width, area, from_dir='data/thresh/'): # creates a time series of flux data # returns time series object of flux # bin_width is time bin size in seconds, area is area of detector in square meters # read in data from threshold file names = ['id', 'jul', 'RE', 'FE', 'FLUX'] skiprows = f.linesToSkip(from_dir + thresh_file + '.thresh') df = pd.read_csv(from_dir + thresh_file + '.thresh', skiprows=skiprows, names=names, delim_whitespace=True) # sort by date/times instead of julian days df['date/times'] = df['jul'] + df['RE'] df['date/times'] = pd.to_datetime(map(f.get_date_time, df['date/times'])) df.index = df['date/times'] # create time series, sample according to bin_width # calculate bins in pandas notation bins = str(int(bin_width / 60)) + 'T' flux_ts = pd.Series(data=df['FLUX'], index=df.index) flux_ts = flux_ts.resample(bins).count() * (1 / ((bin_width / 60) * area)) flux_ts.name = 'FLUX' # determine offset (basically the bin centers) and add to the index start = df['RE'][0] - 0.5 offset_hours = (int(bin_width / 2) + int(start * 86400)) // 3600 offset_minutes = (int(bin_width / 2) + int(start * 86400) - offset_hours * 3600) // 60 offset_seconds = int(bin_width / 2) + int( start * 86400) - offset_hours * 3600 - offset_minutes * 60 offset = offset_hours * Hour() + offset_minutes * Minute( ) + offset_seconds * Second() flux_ts.index += offset # filter out unfilled bins for i in range(len(flux_ts)): if i == 0 and (flux_ts[i] == 0 or flux_ts[i + 1] == 0): flux_ts[i] = 'nan' if i > 0 and i < len(flux_ts) - 1 and (flux_ts[i - 1] == 0 or flux_ts[i] == 0 or flux_ts[i + 1] == 0): flux_ts[i] = 'nan' if i == len(flux_ts) - 1 and (flux_ts[i - 1] == 0 or flux_ts[i] == 0): flux_ts[i] = 'nan' flux_ts = flux_ts.interpolate() return flux_ts
def to_offset(self) -> DateOffset: if self.value == "H": return Hour(1) elif self.value == "D": return Day(1) elif self.value == "W-MON": return Week(1, weekday=0) elif self.value == "MS": return MonthBegin(1) elif self.value == "QS-DEC": return QuarterBegin(startingMonth=10) elif self.value == "AS": return YearBegin(1) raise NotImplementedError(self.value)
class TestFreq: @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []]) @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)]) def test_freq_setter(self, values, freq): # GH#20678 idx = TimedeltaIndex(values) # can set to an offset, converting from string if necessary idx._data.freq = freq assert idx.freq == freq assert isinstance(idx.freq, DateOffset) # can reset to None idx._data.freq = None assert idx.freq is None def test_freq_setter_errors(self): # GH#20678 idx = TimedeltaIndex(["0 days", "2 days", "4 days"]) # setting with an incompatible freq msg = ("Inferred frequency 2D from passed values does not conform to " "passed frequency 5D") with pytest.raises(ValueError, match=msg): idx._data.freq = "5D" # setting with a non-fixed frequency msg = r"<2 \* BusinessDays> is a non-fixed frequency" with pytest.raises(ValueError, match=msg): idx._data.freq = "2B" # setting with non-freq string with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo" def test_freq_view_safe(self): # Setting the freq for one TimedeltaIndex shouldn't alter the freq # for another that views the same data tdi = TimedeltaIndex(["0 days", "2 days", "4 days"], freq="2D") tda = tdi._data tdi2 = TimedeltaIndex(tda)._with_freq(None) assert tdi2.freq is None # Original was not altered assert tdi.freq == "2D" assert tda.freq == "2D"
class TestFreq: def test_freq_setter_errors(self): # GH#20678 idx = DatetimeIndex(["20180101", "20180103", "20180105"]) # setting with an incompatible freq msg = ("Inferred frequency 2D from passed values does not conform to " "passed frequency 5D") with pytest.raises(ValueError, match=msg): idx._data.freq = "5D" # setting with non-freq string with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo" @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) @pytest.mark.parametrize( "freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)]) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_freq_setter(self, values, freq, tz): # GH#20678 idx = DatetimeIndex(values, tz=tz) # can set to an offset, converting from string if necessary idx._data.freq = freq assert idx.freq == freq assert isinstance(idx.freq, DateOffset) # can reset to None idx._data.freq = None assert idx.freq is None def test_freq_view_safe(self): # Setting the freq for one DatetimeIndex shouldn't alter the freq # for another that views the same data dti = date_range("2016-01-01", periods=5) dta = dti._data dti2 = DatetimeIndex(dta)._with_freq(None) assert dti2.freq is None # Original was not altered assert dti.freq == "D" assert dta.freq == "D"
def next_update_time(last_updated, freq='D', hour=18, minute=0, second=0): """计算下次更新时间 说明: 'S':移动到下一秒 'm':移动到下一分钟 'H':移动到下一小时 'D':移动到下一天 'W':移动到下周一 'M':移动到下月第一天 'Q':下一季度的第一天 将时间调整到指定的hour和minute """ if pd.isnull(last_updated): return MARKET_START if freq == 'S': off = Second() return last_updated + off elif freq == 'm': off = Minute() return last_updated + off elif freq == 'H': off = Hour() return last_updated + off elif freq == 'D': d = BDay(n=1, normalize=True) res = last_updated + d return res.replace(hour=hour, minute=minute, second=second) elif freq == 'W': w = Week(normalize=True, weekday=0) res = last_updated + w return res.replace(hour=hour, minute=minute, second=second) elif freq == 'M': m = MonthBegin(n=1, normalize=True) res = last_updated + m return res.replace(hour=hour, minute=minute, second=second) elif freq == 'Q': q = QuarterBegin(normalize=True, startingMonth=1) res = last_updated + q return res.replace(hour=hour, minute=minute, second=second) else: raise TypeError('不能识别的周期类型,仅接受{}'.format( ('S', 'm', 'H', 'D', 'W', 'M', 'Q')))
def create_position_info(self): """ Create daily profit loss using stock data not using statement data :return: dict """ last_close = self.stocks.last().close if self.date == self.position_set.stop_date and self.position_set.status == 'CLOSE': last_close = self.close_order.net_price stage = self.position_set.get_stage(last_close) status = self.position_set.current_status( new_price=last_close, old_price=self.stocks.reverse()[1].close ) pl_open = (last_close - self.open_order.net_price) * self.open_order.quantity pl_open_pct = round(pl_open / (self.open_order.net_price * self.open_order.quantity) * 100, 2) if self.date == self.start_date: pl_day = (last_close - self.open_order.net_price) * self.open_order.quantity elif self.date == self.stop_date: pl_day = (self.close_order.net_price - self.stocks.reverse()[1].close) * self.open_order.quantity else: pl_day = (last_close - self.stocks.reverse()[1].close) * self.open_order.quantity pl_day_pct = round(pl_day / (self.open_order.net_price * self.open_order.quantity) * 100, 2) return dict( stage_id=stage.id, stage=stage.stage_name, status=status, pl_open=round(pl_open, 2), pl_open_pct=pl_open_pct, pl_day=round(pl_day, 2), pl_day_pct=pl_day_pct, enter_price=self.open_order.net_price, exit_price=self.close_order.net_price if self.close_order else 0.0, quantity=self.open_order.quantity, holding=self.open_order.net_price * self.open_order.quantity, bp_effect=self.position_instruments.last().bp_effect, date=(self.date + Hour(17) + Minute(30)).to_datetime().date(), )
def dataPreparation(rawdata): # Cut data until 2016-04-22 rawdata = rawdata[:DATA_FOR_EXPERIMENT] data = pd.DataFrame(columns=range(TRAINING_WINDOW_HOURS)) label = pd.DataFrame(columns=range(PREDICTION_WINDOW_HOURS)) idx = pd.date_range(start=TRAINING_DURATION_START, periods=MOVING_WINDOW_SIZE, freq='H') data_duration = ( TEST_DURATION_END - datetime.timedelta(TRAINING_WINDOW_DAYS)) - TRAINING_DURATION_START for i in range(data_duration.days): expdata = rawdata.loc[idx] data_row = (expdata[:TRAINING_WINDOW_HOURS].values).transpose() label_row = (expdata[TRAINING_WINDOW_HOURS:].values).transpose() data = data.append(pd.DataFrame(data=data_row, columns=range(TRAINING_WINDOW_HOURS)), ignore_index=True) label = label.append(pd.DataFrame( data=label_row, columns=range(PREDICTION_WINDOW_HOURS)), ignore_index=True) idx = idx + Hour(MOVING_WINDOW_STEP) # Split data into train and test data TRAINING_PERIODS = TRAINING_DURATION_END - TRAINING_DURATION_START train_data = data[:TRAINING_PERIODS.days].values.astype('float') train_label = label[:TRAINING_PERIODS.days].values.astype('float') test_data = data[TRAINING_PERIODS.days:].values.astype('float') test_label = label[TRAINING_PERIODS.days:].values.astype('float') return train_data, train_label, test_data, test_label
#---------------------------------------------------------------------- # Offset names ("time rules") and related functions from pandas.tseries.offsets import (Day, BDay, Hour, Minute, Second, Milli, Week, Micro, MonthEnd, MonthBegin, BMonthBegin, BMonthEnd, YearBegin, YearEnd, BYearBegin, BYearEnd, QuarterBegin, QuarterEnd, BQuarterBegin, BQuarterEnd) _offset_map = { 'D' : Day(), 'B' : BDay(), 'H' : Hour(), 'T' : Minute(), 'S' : Second(), 'L' : Milli(), 'U' : Micro(), None : None, # Monthly - Calendar 'M' : MonthEnd(), 'MS' : MonthBegin(), # Monthly - Business 'BM' : BMonthEnd(), 'BMS' : BMonthBegin(), # Annual - Calendar
'W': 'W-SUN', 'Q': 'Q-DEC', 'A': 'A-DEC', # YearEnd(month=12), 'AS': 'AS-JAN', # YearBegin(month=1), 'BA': 'BA-DEC', # BYearEnd(month=12), 'BAS': 'BAS-JAN', # BYearBegin(month=1), 'Min': 'T', 'min': 'T', 'ms': 'L', 'us': 'U', 'ns': 'N' } _name_to_offset_map = { 'days': Day(1), 'hours': Hour(1), 'minutes': Minute(1), 'seconds': Second(1), 'milliseconds': Milli(1), 'microseconds': Micro(1), 'nanoseconds': Nano(1) } _INVALID_FREQ_ERROR = "Invalid frequency: {0}" def to_offset(freqstr): """ Return DateOffset object from string representation or Timedelta object
def create_data(): """ create the pickle/msgpack data """ data = { 'A': [0., 1., 2., 3., np.nan], 'B': [0, 1, 0, 1, 0], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': date_range('1/1/2009', periods=5), 'E': [0., 1, Timestamp('20100101'), 'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range('00:00:00', freq='30T', periods=10)) if _loose_version >= LooseVersion('0.18'): from pandas import RangeIndex index['range'] = RangeIndex(10) if _loose_version >= LooseVersion('0.21'): from pandas import interval_range index['interval'] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])), names=['first', 'second'])) series = dict(float=Series(data['A']), int=Series(data['B']), mixed=Series(data['E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=['one', 'two'])), dup=Series(np.arange(5).astype(np.float64), index=['A', 'B', 'C', 'D', 'A']), cat=Series(Categorical(['foo', 'bar', 'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series( date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict(float=DataFrame({ 'A': series['float'], 'B': series['float'] + 1 }), int=DataFrame({ 'A': series['int'], 'B': series['int'] + 1 }), mixed=DataFrame({k: data[k] for k in ['A', 'B', 'C', 'D']}), mi=DataFrame( { 'A': np.arange(5).astype(np.float64), 'B': np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'baz'], ['one', 'two', 'one', 'two', 'three']])), names=['first', 'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=['A', 'B', 'A']), cat_onecol=DataFrame({'A': Categorical(['foo', 'bar'])}), cat_and_float=DataFrame({ 'A': Categorical(['foo', 'bar', 'baz']), 'B': np.arange(3).astype(np.int64) }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { 'A': Timestamp('20130102', tz='US/Eastern'), 'B': Timestamp('20130603', tz='CET') }, index=range(5)), dt_mixed2_tzs=DataFrame( { 'A': Timestamp('20130102', tz='US/Eastern'), 'B': Timestamp('20130603', tz='CET'), 'C': Timestamp('20130603', tz='UTC') }, index=range(5))) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, tz=Timestamp('2011-01-01', tz='US/Eastern')) if _loose_version < LooseVersion('0.19.2'): timestamp['freq'] = Timestamp('2011-01-01', offset='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', offset='M') else: timestamp['freq'] = Timestamp('2011-01-01', freq='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M') off = { 'DateOffset': DateOffset(years=1), 'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824), 'BusinessDay': BusinessDay(offset=timedelta(seconds=9)), 'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'), 'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'), 'SemiMonthBegin': SemiMonthBegin(day_of_month=9), 'SemiMonthEnd': SemiMonthEnd(day_of_month=24), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), 'QuarterEnd': QuarterEnd(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), 'Week_Tues': Week(2, normalize=False, weekday=1), 'WeekOfMonth': WeekOfMonth(week=3, weekday=4), 'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3), 'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"), 'Easter': Easter(), 'Hour': Hour(1), 'Minute': Minute(1) } return dict(series=series, frame=frame, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off)
class TestTimedeltaIndexOps: def test_value_counts_unique(self): # GH 7735 idx = timedelta_range("1 days 09:00:00", freq="H", periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1))) exp_idx = timedelta_range("1 days 18:00:00", freq="-1H", periods=10) exp_idx = exp_idx._with_freq(None) expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") obj = idx tm.assert_series_equal(obj.value_counts(), expected) obj = Series(idx) tm.assert_series_equal(obj.value_counts(), expected) expected = timedelta_range("1 days 09:00:00", freq="H", periods=10) tm.assert_index_equal(idx.unique(), expected) idx = TimedeltaIndex( [ "1 days 09:00:00", "1 days 09:00:00", "1 days 09:00:00", "1 days 08:00:00", "1 days 08:00:00", pd.NaT, ] ) exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00"]) expected = Series([3, 2], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00", pd.NaT]) expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx) def test_nonunique_contains(self): # GH 9512 for idx in map( TimedeltaIndex, ( [0, 1, 0], [0, 0, -1], [0, -1, -1], ["00:01:00", "00:01:00", "00:02:00"], ["00:01:00", "00:01:00", "00:00:01"], ), ): assert idx[0] in idx def test_unknown_attribute(self): # see gh-9680 tdi = timedelta_range(start=0, periods=10, freq="1s") ts = Series(np.random.normal(size=10), index=tdi) assert "foo" not in ts.__dict__.keys() msg = "'Series' object has no attribute 'foo'" with pytest.raises(AttributeError, match=msg): ts.foo def test_order(self): # GH 10295 idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"], freq="D", name="idx") idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"], freq="H", name="idx") for idx in [idx1, idx2]: ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, idx[::-1]) assert ordered.freq == expected.freq assert ordered.freq.n == -1 idx1 = TimedeltaIndex( ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1" ) exp1 = TimedeltaIndex( ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1" ) idx2 = TimedeltaIndex( ["1 day", "3 day", "5 day", "2 day", "1 day"], name="idx2" ) for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: ordered = idx.sort_values() tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = idx.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 0, 4]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None def test_drop_duplicates_metadata(self, freq_sample): # GH 10115 idx = timedelta_range("1 day", periods=10, freq=freq_sample, name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq idx_dup = idx.append(idx) assert idx_dup.freq is None # freq is reset result = idx_dup.drop_duplicates() expected = idx._with_freq(None) tm.assert_index_equal(expected, result) assert result.freq is None @pytest.mark.parametrize( "keep, expected, index", [ ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), ( False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10), ), ], ) def test_drop_duplicates(self, freq_sample, keep, expected, index): # to check Index/Series compat idx = timedelta_range("1 day", periods=10, freq=freq_sample, name="idx") idx = idx.append(idx[:5]) tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) expected = idx[~expected] result = idx.drop_duplicates(keep=keep) tm.assert_index_equal(result, expected) result = Series(idx).drop_duplicates(keep=keep) tm.assert_series_equal(result, Series(expected, index=index)) def test_infer_freq(self, freq_sample): # GH#11018 idx = timedelta_range("1", freq=freq_sample, periods=10) result = TimedeltaIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) assert result.freq == freq_sample def test_repeat(self): index = timedelta_range("1 days", periods=2, freq="D") exp = TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"]) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = TimedeltaIndex(["1 days", "NaT", "3 days"]) exp = TimedeltaIndex( [ "1 days", "1 days", "1 days", "NaT", "NaT", "NaT", "3 days", "3 days", "3 days", ] ) for res in [index.repeat(3), np.repeat(index, 3)]: tm.assert_index_equal(res, exp) assert res.freq is None def test_nat(self): assert TimedeltaIndex._na_value is pd.NaT assert TimedeltaIndex([])._na_value is pd.NaT idx = TimedeltaIndex(["1 days", "2 days"]) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert idx.hasnans is False tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = TimedeltaIndex(["1 days", "NaT"]) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) assert idx.hasnans is True tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []]) @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)]) def test_freq_setter(self, values, freq): # GH 20678 idx = TimedeltaIndex(values) # can set to an offset, converting from string if necessary idx._data.freq = freq assert idx.freq == freq assert isinstance(idx.freq, DateOffset) # can reset to None idx._data.freq = None assert idx.freq is None def test_freq_setter_errors(self): # GH 20678 idx = TimedeltaIndex(["0 days", "2 days", "4 days"]) # setting with an incompatible freq msg = ( "Inferred frequency 2D from passed values does not conform to " "passed frequency 5D" ) with pytest.raises(ValueError, match=msg): idx._data.freq = "5D" # setting with a non-fixed frequency msg = r"<2 \* BusinessDays> is a non-fixed frequency" with pytest.raises(ValueError, match=msg): idx._data.freq = "2B" # setting with non-freq string with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo" def test_freq_view_safe(self): # Setting the freq for one TimedeltaIndex shouldn't alter the freq # for another that views the same data tdi = TimedeltaIndex(["0 days", "2 days", "4 days"], freq="2D") tda = tdi._data tdi2 = TimedeltaIndex(tda)._with_freq(None) assert tdi2.freq is None # Original was not altered assert tdi.freq == "2D" assert tda.freq == "2D"
class TestDatetimeIndexOps: def test_ops_properties_basic(self, datetime_series): # sanity check that the behavior didn't change # GH#7206 for op in ["year", "day", "second", "weekday"]: msg = f"'Series' object has no attribute '{op}'" with pytest.raises(AttributeError, match=msg): getattr(datetime_series, op) # attribute access should still work! s = Series(dict(year=2000, month=1, day=10)) assert s.year == 2000 assert s.month == 1 assert s.day == 10 msg = "'Series' object has no attribute 'weekday'" with pytest.raises(AttributeError, match=msg): s.weekday def test_repeat_range(self, tz_naive_fixture): tz = tz_naive_fixture rng = date_range("1/1/2000", "1/1/2001") result = rng.repeat(5) assert result.freq is None assert len(result) == 5 * len(rng) index = pd.date_range("2001-01-01", periods=2, freq="D", tz=tz) exp = DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = pd.date_range("2001-01-01", periods=2, freq="2D", tz=tz) exp = DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz) exp = DatetimeIndex( [ "2001-01-01", "2001-01-01", "2001-01-01", "NaT", "NaT", "NaT", "2003-01-01", "2003-01-01", "2003-01-01", ], tz=tz, ) for res in [index.repeat(3), np.repeat(index, 3)]: tm.assert_index_equal(res, exp) assert res.freq is None def test_repeat(self, tz_naive_fixture): tz = tz_naive_fixture reps = 2 msg = "the 'axis' parameter is not supported" rng = pd.date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz) expected_rng = DatetimeIndex([ Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), ]) res = rng.repeat(reps) tm.assert_index_equal(res, expected_rng) assert res.freq is None tm.assert_index_equal(np.repeat(rng, reps), expected_rng) with pytest.raises(ValueError, match=msg): np.repeat(rng, reps, axis=1) def test_resolution(self, tz_naive_fixture): tz = tz_naive_fixture for freq, expected in zip( ["A", "Q", "M", "D", "H", "T", "S", "L", "U"], [ "day", "day", "day", "day", "hour", "minute", "second", "millisecond", "microsecond", ], ): idx = pd.date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) assert idx.resolution == expected def test_value_counts_unique(self, tz_naive_fixture): tz = tz_naive_fixture # GH 7735 idx = pd.date_range("2011-01-01 09:00", freq="H", periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz) exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz) expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") expected.index = expected.index._with_freq(None) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) expected = pd.date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz) expected = expected._with_freq(None) tm.assert_index_equal(idx.unique(), expected) idx = DatetimeIndex( [ "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 08:00", "2013-01-01 08:00", pd.NaT, ], tz=tz, ) exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz) expected = Series([3, 2], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) exp_idx = DatetimeIndex( ["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz) expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx) @pytest.mark.parametrize( "idx", [ DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"), DatetimeIndex( ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H", name="tzidx", tz="Asia/Tokyo", ), ], ) def test_order_with_freq(self, idx): ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) assert ordered.freq == expected.freq assert ordered.freq.n == -1 @pytest.mark.parametrize( "index_dates,expected_dates", [ ( [ "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01" ], [ "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05" ], ), ( [ "2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01" ], [ "2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05" ], ), ( [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT], [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"], ), ], ) def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture): tz = tz_naive_fixture # without freq index = DatetimeIndex(index_dates, tz=tz, name="idx") expected = DatetimeIndex(expected_dates, tz=tz, name="idx") ordered = index.sort_values(na_position="first") tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = index.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True, na_position="first") tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 0, 4]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None def test_drop_duplicates_metadata(self, freq_sample): # GH 10115 idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq idx_dup = idx.append(idx) assert idx_dup.freq is None # freq is reset result = idx_dup.drop_duplicates() expected = idx._with_freq(None) tm.assert_index_equal(result, expected) assert result.freq is None @pytest.mark.parametrize( "keep, expected, index", [ ("first", np.concatenate( ([False] * 10, [True] * 5)), np.arange(0, 10)), ("last", np.concatenate( ([True] * 5, [False] * 10)), np.arange(5, 15)), ( False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10), ), ], ) def test_drop_duplicates(self, freq_sample, keep, expected, index): # to check Index/Series compat idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") idx = idx.append(idx[:5]) tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) expected = idx[~expected] result = idx.drop_duplicates(keep=keep) tm.assert_index_equal(result, expected) result = Series(idx).drop_duplicates(keep=keep) tm.assert_series_equal(result, Series(expected, index=index)) def test_infer_freq(self, freq_sample): # GH 11018 idx = pd.date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10) result = DatetimeIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) assert result.freq == freq_sample def test_nat(self, tz_naive_fixture): tz = tz_naive_fixture assert DatetimeIndex._na_value is pd.NaT assert DatetimeIndex([])._na_value is pd.NaT idx = DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert idx.hasnans is False tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = DatetimeIndex(["2011-01-01", "NaT"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) assert idx.hasnans is True tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) def test_equals(self): # GH 13107 idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"]) assert idx.equals(idx) assert idx.equals(idx.copy()) assert idx.equals(idx.astype(object)) assert idx.astype(object).equals(idx) assert idx.astype(object).equals(idx.astype(object)) assert not idx.equals(list(idx)) assert not idx.equals(Series(idx)) idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific") assert not idx.equals(idx2) assert not idx.equals(idx2.copy()) assert not idx.equals(idx2.astype(object)) assert not idx.astype(object).equals(idx2) assert not idx.equals(list(idx2)) assert not idx.equals(Series(idx2)) # same internal, different tz idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific") tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) assert not idx.equals(idx3.copy()) assert not idx.equals(idx3.astype(object)) assert not idx.astype(object).equals(idx3) assert not idx.equals(list(idx3)) assert not idx.equals(Series(idx3)) # check that we do not raise when comparing with OutOfBounds objects oob = Index([datetime(2500, 1, 1)] * 3, dtype=object) assert not idx.equals(oob) assert not idx2.equals(oob) assert not idx3.equals(oob) # check that we do not raise when comparing with OutOfBounds dt64 oob2 = oob.map(np.datetime64) assert not idx.equals(oob2) assert not idx2.equals(oob2) assert not idx3.equals(oob2) @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) @pytest.mark.parametrize( "freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)]) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_freq_setter(self, values, freq, tz): # GH 20678 idx = DatetimeIndex(values, tz=tz) # can set to an offset, converting from string if necessary idx._data.freq = freq assert idx.freq == freq assert isinstance(idx.freq, DateOffset) # can reset to None idx._data.freq = None assert idx.freq is None def test_freq_setter_errors(self): # GH 20678 idx = DatetimeIndex(["20180101", "20180103", "20180105"]) # setting with an incompatible freq msg = ("Inferred frequency 2D from passed values does not conform to " "passed frequency 5D") with pytest.raises(ValueError, match=msg): idx._data.freq = "5D" # setting with non-freq string with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo" def test_freq_view_safe(self): # Setting the freq for one DatetimeIndex shouldn't alter the freq # for another that views the same data dti = pd.date_range("2016-01-01", periods=5) dta = dti._data dti2 = DatetimeIndex(dta)._with_freq(None) assert dti2.freq is None # Original was not altered assert dti.freq == "D" assert dta.freq == "D"
class TestDatetimeIndexOps: def test_ops_properties_basic(self, datetime_series): # sanity check that the behavior didn't change # GH#7206 for op in ["year", "day", "second", "weekday"]: msg = f"'Series' object has no attribute '{op}'" with pytest.raises(AttributeError, match=msg): getattr(datetime_series, op) # attribute access should still work! s = Series({"year": 2000, "month": 1, "day": 10}) assert s.year == 2000 assert s.month == 1 assert s.day == 10 msg = "'Series' object has no attribute 'weekday'" with pytest.raises(AttributeError, match=msg): s.weekday @pytest.mark.parametrize( "freq,expected", [ ("A", "day"), ("Q", "day"), ("M", "day"), ("D", "day"), ("H", "hour"), ("T", "minute"), ("S", "second"), ("L", "millisecond"), ("U", "microsecond"), ], ) def test_resolution(self, request, tz_naive_fixture, freq, expected): tz = tz_naive_fixture if freq == "A" and not IS64 and isinstance(tz, tzlocal): request.node.add_marker( pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038") ) idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) assert idx.resolution == expected def test_value_counts_unique(self, tz_naive_fixture): tz = tz_naive_fixture # GH 7735 idx = date_range("2011-01-01 09:00", freq="H", periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz) exp_idx = date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz) expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") expected.index = expected.index._with_freq(None) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) expected = date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz) expected = expected._with_freq(None) tm.assert_index_equal(idx.unique(), expected) idx = DatetimeIndex( [ "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 08:00", "2013-01-01 08:00", pd.NaT, ], tz=tz, ) exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz) expected = Series([3, 2], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz) expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx) def test_infer_freq(self, freq_sample): # GH 11018 idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10) result = DatetimeIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) assert result.freq == freq_sample @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)]) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_freq_setter(self, values, freq, tz): # GH 20678 idx = DatetimeIndex(values, tz=tz) # can set to an offset, converting from string if necessary idx._data.freq = freq assert idx.freq == freq assert isinstance(idx.freq, DateOffset) # can reset to None idx._data.freq = None assert idx.freq is None def test_freq_setter_errors(self): # GH 20678 idx = DatetimeIndex(["20180101", "20180103", "20180105"]) # setting with an incompatible freq msg = ( "Inferred frequency 2D from passed values does not conform to " "passed frequency 5D" ) with pytest.raises(ValueError, match=msg): idx._data.freq = "5D" # setting with non-freq string with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo" def test_freq_view_safe(self): # Setting the freq for one DatetimeIndex shouldn't alter the freq # for another that views the same data dti = date_range("2016-01-01", periods=5) dta = dti._data dti2 = DatetimeIndex(dta)._with_freq(None) assert dti2.freq is None # Original was not altered assert dti.freq == "D" assert dta.freq == "D"
def test_Hour(): assert_offset_equal(Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 1)) assert_offset_equal(Hour(-1), datetime(2010, 1, 1, 1), datetime(2010, 1, 1)) assert_offset_equal(2 * Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 2)) assert_offset_equal(-1 * Hour(), datetime(2010, 1, 1, 1), datetime(2010, 1, 1)) assert Hour(3) + Hour(2) == Hour(5) assert Hour(3) - Hour(2) == Hour() assert Hour(4) != Hour(1)
def test_Day_equals_24_Hours(): ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki') result = ts + Day(1) expected = ts + Hour(24) assert result == expected
index = pd.date_range(start='20120401', periods=25) index = pd.date_range(end='20120601', periods=20) ''' 起始和结束日期定义了日期索引的严格边界,如果想要生成一个由每月最后一个工作日组成的日期索引, 可以传入BM频率,business end of month, ''' index = pd.date_range('1/1/2000', '12/1/2000', freq='BM') # 频率和日期偏移量 ''' pandas中的频率是由一个基础频率和一个乘数组成的,基础频率通常以一个字符串别名表示,'M','H' 等,对于每个基础频率,都有一个被称为日期偏移量的对象与之对应 ''' from pandas.tseries.offsets import Hour, Minute hour = Hour() four_hours = Hour(4) # 一般无需显式创建这样的对象,只需使用如'H','4H'等字符串别名即可 index = pd.date_range('20000105', '20000106', freq='4H') # 大部分偏移量对象可通过加法进行连接 interval = Hour(2) + Minute(25) # 同理,也可以传入频率字符串,’2h30min',可被解析为等效的表达式 index = pd.date_range('20000105', '20000106', freq='4h15min') # WOM日期,week of month,是一种非常实用的频率类,以WOM开头,可以获得诸如 每月第三个星期五之类的日期 rng = pd.date_range('20000105', '20010106', freq='WOM-3MON') # 移动(超前和滞后)数据 ''' 移动指的是沿着时间轴将数据前移或后移,Series和dataframe都有一个shift方法用于执行单纯的 前移或后移操作,保持索引不变 '''
def time_for_next_update(last_time, freq='D', num=9, is_end=False): """前次更新后下一次更新时间 Arguments: last_time {obj} -- 上次时间 Keyword Arguments: freq {str} -- 更新周期 (default: {'D'}) num {int} -- 日级别以下为单位数,以上为小时数 (default: {9}) is_end {bool} -- 是否为周期尾部 (default: {False}) Raises: TypeError: 不能识别的周期类型 Returns: Timestamp -- 下一次更新时间 Notes: 一、 freq < D `num`代表周期数 上一时点`normalize`后移动`num`周期,不考虑开始及结束问题 二、 freq in D、B `num`代表小时 对于历史时间,上一时点`normalize`后一律移动到下一个周期,且将小时调整到指定的num 如上一时点其日期为当前日期,且在其`normalize`及调整小时后的值晚于上一时点,则取调整后的值 三、 freq > D 开始及结束才有效 `num`无效 如周初、周末、月初、月末、季初、季末、年初、年末 此时num数字不起作用 """ valid_freq = ('B', 'D', 'W', 'M', 'Q', 'H', 'MIN') if pd.isnull(last_time): return pd.Timestamp(MARKET_START) assert isinstance( last_time, pd.Timestamp), f'类型错误,希望Timestamp,实际为{type(last_time)}' now = pd.Timestamp.now(tz=last_time.tz) assert last_time <= now, '过去时间必须小于当前时间' freq = freq.upper() if freq == 'MIN': offset = Minute(n=num) return offset.apply(last_time.floor(freq)) if freq == 'H': offset = Hour(n=num) return offset.apply(last_time.floor(freq)) if freq == 'D': # √ 此处要考虑小时数 limit = last_time.floor(freq).replace(hour=num) if last_time < limit: return limit else: offset = Day() return offset.apply(last_time.floor(freq)).replace(hour=num) if freq == 'B': offset = BDay() # 工作日 if last_time.weekday() in range(0, 5): # √ 此处要考虑小时数 limit = last_time.normalize().replace(hour=num) if last_time < limit: return limit else: return offset.apply(last_time.normalize()).replace(hour=num) else: return offset.apply(last_time.normalize()).replace(hour=num) if freq == 'W': nw = last_time.normalize() + pd.Timedelta(weeks=1) if is_end: return nw + pd.Timedelta(days=7-nw.weekday()) - pd.Timedelta(nanoseconds=1) else: return nw - pd.Timedelta(days=nw.weekday()) if freq == 'M': if is_end: offset = MonthEnd(n=2) res = offset.apply(last_time.normalize()) if last_time.is_month_end: res = offset.rollback(res) return res else: offset = MonthBegin() return offset.apply(last_time.normalize()) if freq == 'Q': if is_end: offset = QuarterEnd(n=2, startingMonth=3, normalize=True) res = offset.apply(last_time) if last_time.is_quarter_end: offset = QuarterEnd(n=-1, startingMonth=3, normalize=True) res = offset.apply(res) return res else: offset = QuarterBegin(n=1, normalize=True, startingMonth=1) return offset.apply(last_time) if freq == 'Y': if last_time.year == now.year: if is_end: return last_time.normalize().replace(year=now.year, month=12, day=31) else: return last_time.normalize().replace(year=now.year, month=1, day=1) if is_end: offset = YearEnd(normalize=True, month=12, n=2) res = offset.apply(last_time) if last_time.is_year_end: offset = YearEnd(n=-1, month=12, normalize=True) res = offset.apply(res) return res else: offset = YearBegin(normalize=True, month=1, n=1) return offset.apply(last_time) raise ValueError('不能识别的周期类型,仅接受{}。实际输入为{}'.format( valid_freq, freq))
def create_data(): """ create the pickle/msgpack data """ data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M")) index = dict( int=Index(np.arange(10)), date=date_range("20130101", periods=10), period=period_range("2013-01-01", freq="M", periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range("00:00:00", freq="30T", periods=10), ) index["range"] = RangeIndex(10) if _loose_version >= LooseVersion("0.21"): from pandas import interval_range index["interval"] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ])), names=["first", "second"], )) series = dict( float=Series(data["A"]), int=Series(data["B"]), mixed=Series(data["E"]), ts=Series(np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)), mi=Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]), ), dup=Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), cat=Series(Categorical(["foo", "bar", "baz"])), dt=Series(date_range("20130101", periods=5)), dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")), period=Series([Period("2000Q1")] * 5), ) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict( float=DataFrame({ "A": series["float"], "B": series["float"] + 1 }), int=DataFrame({ "A": series["int"], "B": series["int"] + 1 }), mixed=DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), mi=DataFrame( { "A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ])), names=["first", "second"], ), ), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]), cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}), cat_and_float=DataFrame({ "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), dt_mixed2_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), ) cat = dict( int8=Categorical(list("abcdefg")), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000)), ) timestamp = dict( normal=Timestamp("2011-01-01"), nat=NaT, tz=Timestamp("2011-01-01", tz="US/Eastern"), ) timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return dict( series=series, frame=frame, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off, )