class TimestampOps: params = [None, "US/Eastern", pytz.UTC, dateutil.tz.tzutc()] param_names = ["tz"] def setup(self, tz): self.ts = Timestamp("2017-08-25 08:16:14", tz=tz) def time_replace_tz(self, tz): self.ts.replace(tzinfo=pytz.timezone("US/Eastern")) def time_replace_None(self, tz): self.ts.replace(tzinfo=None) def time_to_pydatetime(self, tz): self.ts.to_pydatetime() def time_normalize(self, tz): self.ts.normalize() def time_tz_convert(self, tz): if self.ts.tz is not None: self.ts.tz_convert(tz) def time_tz_localize(self, tz): if self.ts.tz is None: self.ts.tz_localize(tz) def time_to_julian_date(self, tz): self.ts.to_julian_date() def time_floor(self, tz): self.ts.floor("5T") def time_ceil(self, tz): self.ts.ceil("5T")
class TimestampOps(object): params = [None, 'US/Eastern', pytz.UTC, dateutil.tz.tzutc()] param_names = ['tz'] def setup(self, tz): self.ts = Timestamp('2017-08-25 08:16:14', tz=tz) def time_replace_tz(self, tz): self.ts.replace(tzinfo=pytz.timezone('US/Eastern')) def time_replace_None(self, tz): self.ts.replace(tzinfo=None) def time_to_pydatetime(self, tz): self.ts.to_pydatetime() def time_normalize(self, tz): self.ts.normalize() def time_tz_convert(self, tz): if self.ts.tz is not None: self.ts.tz_convert(tz) def time_tz_localize(self, tz): if self.ts.tz is None: self.ts.tz_localize(tz) def time_to_julian_date(self, tz): self.ts.to_julian_date() def time_floor(self, tz): self.ts.floor('5T') def time_ceil(self, tz): self.ts.ceil('5T')
def __call__(self, timestamp: pd.Timestamp, ticker: str, price: float) \ -> Union[Tuple[pd.Timestamp, np.ndarray, np.ndarray, np.ndarray, np.ndarray], None]: """ Update bar information and return opens, highs, lows, closes when time's up :param pd.Timestamp timestamp: timestamp of current tick :param str ticker: ticker of the tick :param float price: mid of the tick :return: None or opens, highs, lows closes when time's up """ self.data[ticker].update(price) if self.timestamp is None: # use floor so that we can end this bar earlier self.timestamp = timestamp.floor( self.unit) if self.unit != '' else timestamp elif timestamp >= self.timestamp + self.delta: opens = np.zeros(self.N) highs = np.zeros(self.N) lows = np.zeros(self.N) closes = np.zeros(self.N) for idx, tic in enumerate(self.tickers): opens[idx], highs[idx], lows[idx], closes[idx] = self.data[ tic].clear() self.timestamp = timestamp.floor( self.unit) if self.unit != '' else timestamp return self.timestamp, opens, highs, lows, closes return None
def _list_historical_pricing(self, now: pd.Timestamp, symbol: str, limit: int = 1500): assert limit < 2000 if limit >= 1000: pricing = self.binance_cli.fetch_ohlcv(symbol=symbol, timeframe="1m", limit=1000) ext_limit = (limit + 1) - 1000 pricing += self.binance_cli.fetch_ohlcv( symbol=symbol, timeframe="1m", limit=ext_limit, since=(pricing[0][0] - (60 * ext_limit * 1000)), ) else: pricing = self.binance_cli.fetch_ohlcv(symbol=symbol, timeframe="1m", limit=limit + 1) pricing = pd.DataFrame( pricing, columns=["date", "open", "high", "low", "close", "volume"]).set_index("date") pricing.index = pricing.index.map( lambda x: datetime.utcfromtimestamp(x / 1000)).tz_localize("UTC") # We drop one value always pricing = pricing.sort_index() return pricing[pricing.index < now.floor("T")]
def test_round_minute_freq(self, test_input, freq, expected): # ensure timestamps that shouldn't round don't # GH#21262 dt = Timestamp(test_input) expected = Timestamp(expected) result_ceil = dt.ceil(freq) assert result_ceil == expected result_floor = dt.floor(freq) assert result_floor == expected result_round = dt.round(freq) assert result_round == expected
class TimestampOps: params = [None, 'US/Eastern', pytz.UTC, dateutil.tz.tzutc()] param_names = ['tz'] def setup(self, tz): self.ts = Timestamp('2017-08-25 08:16:14', tz=tz) def time_replace_tz(self, tz): self.ts.replace(tzinfo=pytz.timezone('US/Eastern')) def time_replace_None(self, tz): self.ts.replace(tzinfo=None) def time_to_pydatetime(self, tz): self.ts.to_pydatetime() def time_normalize(self, tz): self.ts.normalize() def time_tz_convert(self, tz): if self.ts.tz is not None: self.ts.tz_convert(tz) def time_tz_localize(self, tz): if self.ts.tz is None: self.ts.tz_localize(tz) def time_to_julian_date(self, tz): self.ts.to_julian_date() def time_floor(self, tz): self.ts.floor('5T') def time_ceil(self, tz): self.ts.ceil('5T')
def test_round_int64(self, timestamp, freq): # check that all rounding modes are accurate to int64 precision # see GH#22591 dt = Timestamp(timestamp) unit = to_offset(freq).nanos # test floor result = dt.floor(freq) assert result.value % unit == 0, f"floor not a {freq} multiple" assert 0 <= dt.value - result.value < unit, "floor error" # test ceil result = dt.ceil(freq) assert result.value % unit == 0, f"ceil not a {freq} multiple" assert 0 <= result.value - dt.value < unit, "ceil error" # test round result = dt.round(freq) assert result.value % unit == 0, f"round not a {freq} multiple" assert abs(result.value - dt.value) <= unit // 2, "round error" if unit % 2 == 0 and abs(result.value - dt.value) == unit // 2: # round half to even assert result.value // unit % 2 == 0, "round half to even error"
def test_round_int64(self, timestamp, freq): """check that all rounding modes are accurate to int64 precision see GH#22591 """ dt = Timestamp(timestamp) unit = to_offset(freq).nanos # test floor result = dt.floor(freq) assert result.value % unit == 0, "floor not a {} multiple".format(freq) assert 0 <= dt.value - result.value < unit, "floor error" # test ceil result = dt.ceil(freq) assert result.value % unit == 0, "ceil not a {} multiple".format(freq) assert 0 <= result.value - dt.value < unit, "ceil error" # test round result = dt.round(freq) assert result.value % unit == 0, "round not a {} multiple".format(freq) assert abs(result.value - dt.value) <= unit // 2, "round error" if unit % 2 == 0 and abs(result.value - dt.value) == unit // 2: # round half to even assert result.value // unit % 2 == 0, "round half to even error"
def test_floor(self): dt = Timestamp("20130101 09:10:11") result = dt.floor("D") expected = Timestamp("20130101") assert result == expected
def test_floor(self): dt = Timestamp('20130101 09:10:11') result = dt.floor('D') expected = Timestamp('20130101') assert result == expected
def test_floor(self): dt = Timestamp('20130101 09:10:11') result = dt.floor('D') expected = Timestamp('20130101') assert result == expected
def _get_minutes_to_sync(self, now: pd.Timestamp): last_sync_on = self.usecase.get_last_sync_on() minutes_delta = int( (now.floor("T") - last_sync_on).total_seconds() // 60) return minutes_delta - 1
def nyiso_cbl(meter, event_start, event_end, look_back, event_type = 'weekday'): ''' calculates the nysio customer baseline given the input parameters Parameters: meter (dataframe): A dataframe consisting of datetime and load values event_start (str) : A str coercile to timestamp for the start of the event event_end (str) : A str coercile to timestamp for the end of the event look_back (int) : An integer specifying the number of days to look back event_type (str) : A string specifying the type of event (weekday, sunday, saturday) Returns: tuple : A tuple of dataframe which give the baselins and the performance for the event hour ''' start = Timestamp(event_start) end = Timestamp(event_end) event_hours = date_range(start, end, freq = 'H').hour.tolist() event_hours = event_hours[:-1] # accounting for hour ending # get max lookback days window_start = start.date() - Timedelta(look_back, unit = 'days') datelist = date_range(window_start, periods = look_back).date.tolist() data = meter[meter.date.isin(datelist)] #TODO: weekend cbl logic if event_type == 'weekday': days = list(range(1,6)) if event_type == 'saturday': days = [6] if event_type == 'sunday': days = [7] #get the seed values seed_data = data[data.hour.isin(event_hours)] seed_data = seed_data[seed_data['date'] != start.date()] seed_data = seed_data.groupby(['date','hour']).mean().reset_index() seed_value = seed_data['kW'].max()*0.25 # identify the low usage days low_usage = seed_data.groupby(['date']).mean() low_usage_dates = low_usage[low_usage.kW < seed_value].index.tolist() rm_day = [d for d in seed_data.date.to_list() if not d.isoweekday() in days] rm_day = list(set(rm_day)) # get dates and holidays to exclude exclude = get_holidays(start.year) exclude.extend(low_usage_dates) exclude.extend([start.date()-Timedelta(1, unit = 'day')]) exclude.extend(rm_day) # get cbl basis days max_days = seed_data.date.unique().tolist() days_to_keep = [d for d in max_days if d not in exclude] days_to_keep.sort(reverse = True) if len(days_to_keep) > 10: cbl_basis = days_to_keep[:10] else: cbl_basis = days_to_keep #get averages and rank them, pick the top 5 of the averages averages = seed_data.groupby('date').mean() averages = averages[averages.index.isin(cbl_basis)] averages['rank'] = averages['kW'].rank(ascending = False) baseline_dates = averages[averages['rank'] <= 5].index.tolist() # calculate baseline as average of the hours for the selected days baseline = data[data.date.isin(baseline_dates)] baseline = baseline.groupby('hour').mean() # actual values during event day event_day = meter[meter.dttm >= start.floor('24H')] event_day = event_day[event_day.dttm < start.ceil('24H')] event_day = event_day.groupby(['id','hour']).mean().reset_index() event_day['baseline'] = baseline.kW #get adjustment factor gaf = weather_adjustment(start = start, end = end, meter = meter, basis_dates = cbl_basis) # get the adjusted baseline event_day['adjustment'] = event_day.baseline * gaf.kW # calculate the event performance per hour perf = perf_calc(event_day, event_hours) return event_day, perf
def round_value(self, value: pd.Timestamp) -> pd.Timestamp: logger.debug(f"Rounding value: {value}") round_step_in_seconds = int(self._round_step.total_seconds()) rounded_value = value.floor(f"{round_step_in_seconds}s") return rounded_value
def sync(session, dataset: str, variables: List[str], start: pd.Timestamp, end: pd.Timestamp, debug: bool, force: bool): session.query(Observation).where( Observation.timestamp.between(start, end), Observation.dataset == dataset, Observation.variable.in_(variables) ).delete(synchronize_session=False) obs_count = row_count = 0 timestamps = set() latest_timestamp = start for row_count, row in enumerate(retrieve(dataset, variables, start, end), start=1): if debug: print(f'{dataset}:', ' '.join(f'{k}={v}' for k, v in row.items())) for variable in variables: value = row[variable] if value in ('', 'x'): continue if value == 'tr': value = 0 timestamp = row['TimeStamp'] timestamps.add(timestamp) latest_timestamp = max(latest_timestamp, timestamp) session.add(Observation( timestamp=timestamp, dataset=dataset, variable=variable, value=value )) obs_count += 1 print(f'{dataset}: {row_count} rows giving {obs_count} observations, ' f'latest at {latest_timestamp}') frequency = FREQUENCY[dataset] start = pd.Timestamp(start) end = pd.Timestamp(end) if frequency == 'D': expected_timestamps = datetime_range(start, end) else: start = (start+pd.Timedelta(1, 'S')).ceil(frequency) end = end.floor(frequency) expected_timestamps = pd.date_range(start, end, freq=frequency) missing = set(expected_timestamps) - timestamps if missing: missing_text = ', '.join(str(m) for m in sorted(missing)) message = f'{len(missing)} missing for {dataset}: {missing_text}' if sorted(missing) == list(expected_timestamps[-len(missing):]) or force: if len(missing) > 1: print('WARNING '+message) else: raise AssertionError(message) unexpected = timestamps - set(expected_timestamps) if unexpected: raise AssertionError('Unexpected: '+', '.join(str(m) for m in sorted(unexpected))) if not debug: session.commit()
def test_round(self): # round dt = Timestamp('20130101 09:10:11') result = dt.round('D') expected = Timestamp('20130101') self.assertEqual(result, expected) dt = Timestamp('20130101 19:10:11') result = dt.round('D') expected = Timestamp('20130102') self.assertEqual(result, expected) dt = Timestamp('20130201 12:00:00') result = dt.round('D') expected = Timestamp('20130202') self.assertEqual(result, expected) dt = Timestamp('20130104 12:00:00') result = dt.round('D') expected = Timestamp('20130105') self.assertEqual(result, expected) dt = Timestamp('20130104 12:32:00') result = dt.round('30Min') expected = Timestamp('20130104 12:30:00') self.assertEqual(result, expected) dti = date_range('20130101 09:10:11', periods=5) result = dti.round('D') expected = date_range('20130101', periods=5) tm.assert_index_equal(result, expected) # floor dt = Timestamp('20130101 09:10:11') result = dt.floor('D') expected = Timestamp('20130101') self.assertEqual(result, expected) # ceil dt = Timestamp('20130101 09:10:11') result = dt.ceil('D') expected = Timestamp('20130102') self.assertEqual(result, expected) # round with tz dt = Timestamp('20130101 09:10:11', tz='US/Eastern') result = dt.round('D') expected = Timestamp('20130101', tz='US/Eastern') self.assertEqual(result, expected) dt = Timestamp('20130101 09:10:11', tz='US/Eastern') result = dt.round('s') self.assertEqual(result, dt) dti = date_range('20130101 09:10:11', periods=5).tz_localize('UTC').tz_convert('US/Eastern') result = dti.round('D') expected = date_range('20130101', periods=5).tz_localize('US/Eastern') tm.assert_index_equal(result, expected) result = dti.round('s') tm.assert_index_equal(result, dti) # invalid for freq in ['Y', 'M', 'foobar']: self.assertRaises(ValueError, lambda: dti.round(freq))