def test_daylight_savings(self): # 2004 daylight savings switches: # Sunday 2004-04-04 and Sunday 2004-10-31 # make sure there's no weirdness around calculating the next day's # session's open time. for date in ["2004-04-05", "2004-11-01"]: next_day = pd.Timestamp(date, tz='UTC') open_date = next_day + Timedelta(days=self.calendar.open_offset) the_open = self.calendar.schedule.loc[next_day].market_open localized_open = the_open.tz_localize("UTC").tz_convert( self.calendar.tz ) self.assertEqual( (open_date.year, open_date.month, open_date.day), (localized_open.year, localized_open.month, localized_open.day) ) self.assertEqual( self.calendar.open_time.hour, localized_open.hour ) self.assertEqual( self.calendar.open_time.minute, localized_open.minute )
def test_bar_count_for_simple_transforms(self): # July 2015 # Su Mo Tu We Th Fr Sa # 1 2 3 4 # 5 6 7 8 9 10 11 # 12 13 14 15 16 17 18 # 19 20 21 22 23 24 25 # 26 27 28 29 30 31 # half an hour into july 9, getting a 4-"day" window should get us # all the minutes of 7/6, 7/7, 7/8, and 31 minutes of 7/9 july_9_dt = self.trading_calendar.open_and_close_for_session( pd.Timestamp("2015-07-09", tz='UTC') )[0] + Timedelta("30 minutes") self.assertEqual( (3 * 390) + 31, self.data_portal._get_minute_count_for_transform(july_9_dt, 4) ) # November 2015 # Su Mo Tu We Th Fr Sa # 1 2 3 4 5 6 7 # 8 9 10 11 12 13 14 # 15 16 17 18 19 20 21 # 22 23 24 25 26 27 28 # 29 30 # nov 26th closed # nov 27th was an early close # half an hour into nov 30, getting a 4-"day" window should get us # all the minutes of 11/24, 11/25, 11/27 (half day!), and 31 minutes # of 11/30 nov_30_dt = self.trading_calendar.open_and_close_for_session( pd.Timestamp("2015-11-30", tz='UTC') )[0] + Timedelta("30 minutes") self.assertEqual( 390 + 390 + 210 + 31, self.data_portal._get_minute_count_for_transform(nov_30_dt, 4) )
def split2sessions(df): index = 0 first_date = None last_date = None last_id = None sessions = [] session = pd.DataFrame(columns=df.columns) textual_search = 'search videos. text:' start = len(textual_search) searched = False threshold = Timedelta(minutes=10) # TODO 30 min for idx, row in enumerate(df.iterrows()): try: id, datetime, geo, user_id, companyid, label, items = row[1].values if idx % 1000 == 0: print 'passed line #%d in split2sessions, # of search seassions is: %d, for id:%d' % ( idx, len(sessions), id) if label == 'search - search box' and textual_search in items and len( items) > start + 2: searched = True # TODO fix time differences type, and check between FIRST interaction and CURRENT # if (last_date and last_date and datetime - last_date > threshold) or (last_id and last_id != user_id): if (first_date and datetime - first_date > threshold) or ( last_id and last_id != user_id): if searched: sessions.append(session) index = 0 session = pd.DataFrame(columns=df.columns) searched = False first_date = None session.loc[index] = row[1] index += 1 last_date = datetime last_id = user_id if not first_date: first_date = datetime except Exception as ex: print ex # last if len(session) > 0: sessions.append(session) print '# of sesseions' + str(len(sessions)) return sessions
def matplotlib_locator_formatter(timedelta, span=1): """ Compute appropriate locator and formatter for renderers based on matplotlib, depending on designated time span. """ from matplotlib.dates import date_ticker_factory, DateFormatter locator, formatter = date_ticker_factory(span) # http://pandas.pydata.org/pandas-docs/stable/timedeltas.html # https://stackoverflow.com/questions/16103238/pandas-timedelta-in-days is_macro = timedelta <= Timedelta(days=1) is_supermacro = timedelta <= Timedelta(minutes=5) if is_macro: #formatter = DateFormatter(fmt='%H:%M:%S.%f') formatter = DateFormatter(fmt='%H:%M') if is_supermacro: formatter = DateFormatter(fmt='%H:%M:%S') # Formatter overrides #if formatter.fmt == '%H:%M\n%b %d': # formatter = DateFormatter(fmt='%Y-%m-%d %H:%M') # Labs #from matplotlib.dates import AutoDateLocator, AutoDateFormatter, HOURLY #locator = AutoDateLocator(maxticks=7) #locator.autoscale() #locator.intervald[HOURLY] = [5] #formatter = AutoDateFormatter(breaks) #formatter = date_format('%Y-%m-%d\n%H:%M') # Default building blocks #from matplotlib.dates import AutoDateFormatter, AutoDateLocator #locator = AutoDateLocator() #formatter = AutoDateFormatter(locator) return locator, formatter
def test_interpolate_gps_time(): filename = "data/serial-link-20150429-163230.log.json.hdf5" assert os.path.isfile(filename) with pd.HDFStore(filename) as store: idx = store.rover_spp.T.host_offset.reset_index() model = t.interpolate_gpst_model(idx) assert isinstance(model, pd.stats.ols.OLS) assert np.allclose([model.beta.x, model.beta.intercept], [1.00000368376, -64.2579561376]) init_offset = store.rover_spp.T.host_offset[0] init_date = store.rover_spp.T.index[0] f = lambda t1: t.apply_gps_time(t1 * t.MSEC_TO_SEC, init_date, model) dates = store.rover_logs.T.host_offset.apply(f) l = dates.tolist() start, end = l[0], l[-1] assert start == Timestamp("2015-04-29 23:32:55.272075") assert end == Timestamp("2015-04-29 23:57:46.457568") init_secs_offset \ = store.rover_spp.T.host_offset[0] - store.rover_logs.T.index[0] assert np.allclose([init_secs_offset * t.MSEC_TO_SEC], [55.859]) assert (init_date - start) == Timedelta('0 days 00:00:55.848925') assert (end - init_date) == Timedelta('0 days 00:23:55.336568') assert pd.DatetimeIndex(dates).is_monotonic_increasing assert dates.shape == (2457, )
def to_offset(freqstr): """ Return DateOffset object from string representation or Timedelta object Examples -------- >>> to_offset('5Min') Minute(5) """ if freqstr is None: return None if isinstance(freqstr, DateOffset): return freqstr if isinstance(freqstr, tuple): name = freqstr[0] stride = freqstr[1] if isinstance(stride, compat.string_types): name, stride = stride, name name, _ = _base_and_stride(name) delta = get_offset(name) * stride elif isinstance(freqstr, timedelta): delta = None freqstr = Timedelta(freqstr) try: for name in freqstr.components._fields: offset = _name_to_offset_map[name] stride = getattr(freqstr.components, name) if stride != 0: offset = stride * offset if delta is None: delta = offset else: delta = delta + offset except Exception: raise ValueError(_INVALID_FREQ_ERROR.format(freqstr)) else: delta = None stride_sign = None try: for stride, name, _ in opattern.findall(freqstr): offset = get_offset(name) if stride_sign is None: stride_sign = -1 if stride.startswith('-') else 1 if not stride: stride = 1 stride = int(stride) offset = offset * int(np.fabs(stride) * stride_sign) if delta is None: delta = offset else: delta = delta + offset except Exception: raise ValueError(_INVALID_FREQ_ERROR.format(freqstr)) if delta is None: raise ValueError(_INVALID_FREQ_ERROR.format(freqstr)) return delta
def to_offset(freq): """ Return DateOffset object from string or tuple representation or datetime.timedelta object Parameters ---------- freq : str, tuple, datetime.timedelta, DateOffset or None Returns ------- delta : DateOffset None if freq is None Raises ------ ValueError If freq is an invalid frequency See Also -------- pandas.DateOffset Examples -------- >>> to_offset('5min') <5 * Minutes> >>> to_offset('1D1H') <25 * Hours> >>> to_offset(('W', 2)) <2 * Weeks: weekday=6> >>> to_offset((2, 'B')) <2 * BusinessDays> >>> to_offset(datetime.timedelta(days=1)) <Day> >>> to_offset(Hour()) <Hour> """ if freq is None: return None if isinstance(freq, DateOffset): return freq if isinstance(freq, tuple): name = freq[0] stride = freq[1] if isinstance(stride, compat.string_types): name, stride = stride, name name, _ = _base_and_stride(name) delta = get_offset(name) * stride elif isinstance(freq, timedelta): delta = None freq = Timedelta(freq) try: for name in freq.components._fields: offset = _name_to_offset_map[name] stride = getattr(freq.components, name) if stride != 0: offset = stride * offset if delta is None: delta = offset else: delta = delta + offset except Exception: raise ValueError(_INVALID_FREQ_ERROR.format(freq)) else: delta = None stride_sign = None try: splitted = re.split(opattern, freq) if splitted[-1] != '' and not splitted[-1].isspace(): # the last element must be blank raise ValueError('last element must be blank') for sep, stride, name in zip(splitted[0::4], splitted[1::4], splitted[2::4]): if sep != '' and not sep.isspace(): raise ValueError('separator must be spaces') offset = get_offset(name) if stride_sign is None: stride_sign = -1 if stride.startswith('-') else 1 if not stride: stride = 1 stride = int(stride) offset = offset * int(np.fabs(stride) * stride_sign) if delta is None: delta = offset else: delta = delta + offset except Exception: raise ValueError(_INVALID_FREQ_ERROR.format(freq)) if delta is None: raise ValueError(_INVALID_FREQ_ERROR.format(freq)) return delta