def test_frame_tz_convert(self): rng = date_range('1/1/2011', periods=200, freq='D', tz='US/Eastern') df = DataFrame({'a': 1}, index=rng) result = df.tz_convert('Europe/Berlin') expected = DataFrame({'a': 1}, rng.tz_convert('Europe/Berlin')) assert result.index.tz.zone == 'Europe/Berlin' tm.assert_frame_equal(result, expected) df = df.T result = df.tz_convert('Europe/Berlin', axis=1) assert result.columns.tz.zone == 'Europe/Berlin' tm.assert_frame_equal(result, expected.T)
def test_frame_tz_convert(self): rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") df = DataFrame({"a": 1}, index=rng) result = df.tz_convert("Europe/Berlin") expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin")) assert result.index.tz.zone == "Europe/Berlin" tm.assert_frame_equal(result, expected) df = df.T result = df.tz_convert("Europe/Berlin", axis=1) assert result.columns.tz.zone == "Europe/Berlin" tm.assert_frame_equal(result, expected.T)
def test_frame_align_aware(self): idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") idx2 = date_range("2001", periods=5, freq="2H", tz="US/Eastern") df1 = DataFrame(np.random.randn(len(idx1), 3), idx1) df2 = DataFrame(np.random.randn(len(idx2), 3), idx2) new1, new2 = df1.align(df2) assert df1.index.tz == new1.index.tz assert df2.index.tz == new2.index.tz # different timezones convert to UTC # frame with frame df1_central = df1.tz_convert("US/Central") new1, new2 = df1.align(df1_central) assert new1.index.tz == pytz.UTC assert new2.index.tz == pytz.UTC # frame with Series new1, new2 = df1.align(df1_central[0], axis=0) assert new1.index.tz == pytz.UTC assert new2.index.tz == pytz.UTC df1[0].align(df1_central, axis=0) assert new1.index.tz == pytz.UTC assert new2.index.tz == pytz.UTC
def test_frame_align_aware(self): idx1 = date_range('2001', periods=5, freq='H', tz='US/Eastern') idx2 = date_range('2001', periods=5, freq='2H', tz='US/Eastern') df1 = DataFrame(np.random.randn(len(idx1), 3), idx1) df2 = DataFrame(np.random.randn(len(idx2), 3), idx2) new1, new2 = df1.align(df2) assert df1.index.tz == new1.index.tz assert df2.index.tz == new2.index.tz # different timezones convert to UTC # frame with frame df1_central = df1.tz_convert('US/Central') new1, new2 = df1.align(df1_central) assert new1.index.tz == pytz.UTC assert new2.index.tz == pytz.UTC # frame with Series new1, new2 = df1.align(df1_central[0], axis=0) assert new1.index.tz == pytz.UTC assert new2.index.tz == pytz.UTC df1[0].align(df1_central, axis=0) assert new1.index.tz == pytz.UTC assert new2.index.tz == pytz.UTC
def update(self, df_in: pd.DataFrame, symbol: str=None, datatype: str=None, barsize: str=None, tz: str=None, standardize_index=True): """ Input data is combined with self.df. Overlapped data will be overwritten by non-null values of input data. Indexes and Columns will be unioned. """ # Check input data type if not (isinstance(df_in, pd.DataFrame)): raise TypeError('Input data must be a pandas.DataFrame.') # Check empty data if df_in.empty: return self # Standardize index if standardize_index: df_in = self._standardize_index( df_in.copy(), symbol=symbol, datatype=datatype, barsize=barsize, tz=tz) # Combine input DataFrame with internal self.df if self.df.empty: # Initialize self.df self.df = df_in.sort_index() else: df_in = df_in.tz_convert(self.tzinfo, level=self.__class__.dtlevel) self.df = df_in.combine_first(self.df).sort_index() # Post-combination processing # Fill NaN, and enforce barcount and volume columns dtype to int64 self.df.fillna(-1, inplace=True) for col in self.df.columns: if col.lower() in ('barcount', 'volume'): self.df[col] = self.df[col].astype(np.int64)
def test_tz_convert(self): rng = date_range('1/1/2011', periods=100, freq='H') ts = Series(1, index=rng) result = ts.tz_convert('utc') self.assert_(result.index.tz.zone == 'UTC') df = DataFrame({'a': 1}, index=rng) result = df.tz_convert('utc') expected = DataFrame({'a': 1}, rng.tz_convert('UTC')) self.assert_(result.index.tz.zone == 'UTC') assert_frame_equal(result, expected) df = df.T result = df.tz_convert('utc', axis=1) self.assert_(result.columns.tz.zone == 'UTC') assert_frame_equal(result, expected.T)
def test_frame_add_tz_mismatch_converts_to_utc(self): rng = date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern") df = DataFrame(np.random.randn(len(rng)), index=rng, columns=["a"]) df_moscow = df.tz_convert("Europe/Moscow") result = df + df_moscow assert result.index.tz is pytz.utc result = df_moscow + df assert result.index.tz is pytz.utc
def test_frame_add_tz_mismatch_converts_to_utc(self): rng = date_range('1/1/2011', periods=10, freq='H', tz='US/Eastern') df = DataFrame(np.random.randn(len(rng)), index=rng, columns=['a']) df_moscow = df.tz_convert('Europe/Moscow') result = df + df_moscow assert result.index.tz is pytz.utc result = df_moscow + df assert result.index.tz is pytz.utc
def test_to_records_datetimeindex_with_tz(self, tz): # GH13937 dr = date_range('2016-01-01', periods=10, freq='S', tz=tz) df = DataFrame({'datetime': dr}, index=dr) expected = df.to_records() result = df.tz_convert("UTC").to_records() # both converted to UTC, so they are equal tm.assert_numpy_array_equal(result, expected)
def test_tz_convert_axis1(self): rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") obj = DataFrame({"a": 1}, index=rng) obj = obj.T result = obj.tz_convert("Europe/Berlin", axis=1) assert result.columns.tz.zone == "Europe/Berlin" expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin")) tm.assert_equal(result, expected.T)
def test_tz_convert(self, frame_or_series): rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") obj = DataFrame({"a": 1}, index=rng) obj = tm.get_obj(obj, frame_or_series) result = obj.tz_convert("Europe/Berlin") expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin")) expected = tm.get_obj(expected, frame_or_series) assert result.index.tz.zone == "Europe/Berlin" tm.assert_equal(result, expected)
def _standardize_index(self, df_in: pd.DataFrame, symbol: str = None, datatype: str = None, barsize: str = None, tz: str = None): """Normalize input DataFrame index to MarketDataBlock standard. """ # Add or starndardize index names in the input. if isinstance(df_in.index, pd.MultiIndex): df_in.reset_index(inplace=True) # Rename ambiguous column names. df_in.columns = [ col_rename.get(col.strip().lower(), col.strip().lower()) for col in df_in.columns ] # Insert Symbol, DataType, Barsize columns from arguments if not # found in the input dataframe. for col in MarketDataBlock.data_index: if col not in df_in.columns: if locals().get(col.lower(), None) is None: raise KeyError( 'No {0} argument and no {0} column in the DataFrame.'. format(col)) df_in.insert(0, col, locals()[col.lower()]) # Convert datetime strings to pandas DatetimeIndex df_in['TickerTime'] = pd.DatetimeIndex(df_in['TickerTime'].apply( pd.Timestamp)) # Standardize BarSize strings df_in['BarSize'] = df_in['BarSize'].map(timedur_standardize) # Set index to class-defined MultiIndex df_in.set_index(MarketDataBlock.data_index, inplace=True) # Set time zone so all DatetimeIndex are tz-aware df_in_tz = df_in.index.levels[self.__class__.dtlevel].tz if df_in_tz is None or isinstance(df_in_tz, timezone) or \ isinstance(df_in_tz, pytz._FixedOffset): # Input df has naive time index, or tzinfo is not pytz.timezone() if tz is None: raise ValueError( 'Argument tz=None, and TickerTime.tzinfo is None(naive),' 'datetime.timezone, or pytz._FixedOffset.') if df_in_tz is None: df_in = df_in.tz_localize(tz, level=self.__class__.dtlevel) else: df_in = df_in.tz_convert(tz, level=self.__class__.dtlevel) return df_in
def test_series_frame_tz_convert(self): rng = date_range('1/1/2011', periods=200, freq='D', tz='US/Eastern') ts = Series(1, index=rng) result = ts.tz_convert('Europe/Berlin') self.assert_(result.index.tz.zone == 'Europe/Berlin') df = DataFrame({'a': 1}, index=rng) result = df.tz_convert('Europe/Berlin') expected = DataFrame({'a': 1}, rng.tz_convert('Europe/Berlin')) self.assert_(result.index.tz.zone == 'Europe/Berlin') assert_frame_equal(result, expected) df = df.T result = df.tz_convert('Europe/Berlin', axis=1) self.assert_(result.columns.tz.zone == 'Europe/Berlin') assert_frame_equal(result, expected.T) # can't convert tz-naive rng = date_range('1/1/2011', periods=200, freq='D') ts = Series(1, index=rng) self.assertRaises(Exception, ts.tz_convert, 'US/Eastern')
def test_series_frame_tz_convert(self): rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") ts = Series(1, index=rng) result = ts.tz_convert("Europe/Berlin") self.assertEqual(result.index.tz.zone, "Europe/Berlin") df = DataFrame({"a": 1}, index=rng) result = df.tz_convert("Europe/Berlin") expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin")) self.assertEqual(result.index.tz.zone, "Europe/Berlin") assert_frame_equal(result, expected) df = df.T result = df.tz_convert("Europe/Berlin", axis=1) self.assertEqual(result.columns.tz.zone, "Europe/Berlin") assert_frame_equal(result, expected.T) # can't convert tz-naive rng = date_range("1/1/2011", periods=200, freq="D") ts = Series(1, index=rng) tm.assertRaisesRegexp(TypeError, "Cannot convert tz-naive", ts.tz_convert, "US/Eastern")
def test_tz_convert(self, frame_or_series): rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") obj = DataFrame({"a": 1}, index=rng) if frame_or_series is not DataFrame: obj = obj["a"] result = obj.tz_convert("Europe/Berlin") expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin")) if frame_or_series is not DataFrame: expected = expected["a"] assert result.index.tz.zone == "Europe/Berlin" tm.assert_equal(result, expected)
def _standardize_index( self, df_in: pd.DataFrame, symbol: str=None, datatype: str=None, barsize: str=None, tz: str=None): """Normalize input DataFrame index to MarketDataBlock standard. """ # Add or starndardize index names in the input. if isinstance(df_in.index, pd.MultiIndex): df_in.reset_index(inplace=True) # Rename ambiguous column names. df_in.columns = [ col_rename.get(col.strip().lower(), col.strip().lower()) for col in df_in.columns] # Insert Symbol, DataType, Barsize columns from arguments if not # found in the input dataframe. for col in MarketDataBlock.data_index: if col not in df_in.columns: if locals().get(col.lower(), None) is None: raise KeyError( 'No {0} argument and no {0} column in the DataFrame.' .format(col)) df_in.insert(0, col, locals()[col.lower()]) # Convert datetime strings to pandas DatetimeIndex df_in['TickerTime'] = pd.DatetimeIndex( df_in['TickerTime'].apply(pd.Timestamp)) # Standardize BarSize strings df_in['BarSize'] = df_in['BarSize'].map(timedur_standardize) # Set index to class-defined MultiIndex df_in.set_index(MarketDataBlock.data_index, inplace=True) # Set time zone so all DatetimeIndex are tz-aware df_in_tz = df_in.index.levels[self.__class__.dtlevel].tz if df_in_tz is None or isinstance(df_in_tz, timezone) or \ isinstance(df_in_tz, pytz._FixedOffset): # Input df has naive time index, or tzinfo is not pytz.timezone() if tz is None: raise ValueError( 'Argument tz=None, and TickerTime.tzinfo is None(naive),' 'datetime.timezone, or pytz._FixedOffset.') if df_in_tz is None: df_in = df_in.tz_localize(tz, level=self.__class__.dtlevel) else: df_in = df_in.tz_convert(tz, level=self.__class__.dtlevel) return df_in
def test_equal_join_ensure_utc(self): rng = date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern") ts = Series(np.random.randn(len(rng)), index=rng) ts_moscow = ts.tz_convert("Europe/Moscow") result = ts + ts_moscow self.assertIs(result.index.tz, pytz.utc) result = ts_moscow + ts self.assertIs(result.index.tz, pytz.utc) df = DataFrame({"a": ts}) df_moscow = df.tz_convert("Europe/Moscow") result = df + df_moscow self.assertIs(result.index.tz, pytz.utc) result = df_moscow + df self.assertIs(result.index.tz, pytz.utc)
def test_equal_join_ensure_utc(self): rng = date_range('1/1/2011', periods=10, freq='H', tz='US/Eastern') ts = Series(np.random.randn(len(rng)), index=rng) ts_moscow = ts.tz_convert('Europe/Moscow') result = ts + ts_moscow self.assert_(result.index.tz is pytz.utc) result = ts_moscow + ts self.assert_(result.index.tz is pytz.utc) df = DataFrame({'a': ts}) df_moscow = df.tz_convert('Europe/Moscow') result = df + df_moscow self.assert_(result.index.tz is pytz.utc) result = df_moscow + df self.assert_(result.index.tz is pytz.utc)
def update(self, df_in: pd.DataFrame, symbol: str = None, datatype: str = None, barsize: str = None, tz: str = None, standardize_index=True): """ Input data is combined with self.df. Overlapped data will be overwritten by non-null values of input data. Indexes and Columns will be unioned. """ # Check input data type if not (isinstance(df_in, pd.DataFrame)): raise TypeError('Input data must be a pandas.DataFrame.') # Check empty data if df_in.empty: return self # Standardize index if standardize_index: df_in = self._standardize_index(df_in.copy(), symbol=symbol, datatype=datatype, barsize=barsize, tz=tz) # Combine input DataFrame with internal self.df if self.df.empty: # Initialize self.df self.df = df_in.sort_index() else: df_in = df_in.tz_convert(self.tzinfo, level=self.__class__.dtlevel) self.df = df_in.combine_first(self.df).sort_index() # Post-combination processing # Fill NaN, and enforce barcount and volume columns dtype to int64 self.df.fillna(-1, inplace=True) for col in self.df.columns: if col.lower() in ('barcount', 'volume'): self.df[col] = self.df[col].astype(np.int64)
class Timeseries(object): def __init__(self, data): """ Can be called with either: A DataFrame. Preferred. timeseries_dict, a dict with UTC datetimes as keys and floats as values. A list of such dicts. This works like a pandas DataFrame, except we keep track of the order of column names.""" if isinstance(data, DataFrame): self._dataframe = data self._columns = tuple(data.columns) elif isinstance(data, dict): series = Series(data) self._dataframe = DataFrame({'data': series}) self._columns = ('data',) else: self._dataframe = DataFrame(dict([ ('data_{0}'.format(i), series) for i, series in enumerate(data)])) self._columns = tuple( 'data_{0}'.format(i) for i, series in enumerate(data)) def add(self, timeseries): """Add the columns from timeseries to the dataframe of this timeseries.""" self._dataframe = self._dataframe.combineAdd(timeseries._dataframe) self._columns = self.columns + timeseries.columns @property def dataframe(self): return self._dataframe @property def timeseries(self): """Return the first of the series in dataframe""" return self._dataframe[self._columns[0]].dropna() def get_series(self, columnname): return self._dataframe[columnname].dropna() def to_csv(self, outfile, sep=',', timezone=None, date_format='%Y-%m-%d %H:%M', header_date_format='Datum + tijd'): """Note: changes the timezone of all datetimes! Write the data of all timeseries to a CSV file.""" if timezone is not None: self.set_timezone(timezone) headers = [header_date_format] + [ self.label_and_unit(column) for column in self.columns] outfile.write(sep.join(headers) + "\n") self._dataframe.to_csv(outfile, sep=sep, mode='a', header=None, date_format=date_format) def set_timezone(self, timezone): """Sets this timezone on all datetimes. Timezone is a pytz timezone object.""" self._dataframe = self._dataframe.tz_convert(timezone) @property def columns(self): return self._columns def label(self, series_name): """Only the part of the columns before '||'.""" return series_name.split('||')[0] def unit(self, series_name): """Only the part of the columns after '||', or None.""" return series_name.split('||')[1] if '||' in series_name else None def label_and_unit(self, series_name): unit = self.unit(series_name) if unit: return "{} ({})".format(self.label(series_name), unit) else: return self.label(series_name) def dates(self): return self.timeseries.keys() def values(self): return list(self.timeseries) def latest(self): return self.timeseries.tail(1) def data(self): return [[key, value] for key, value in izip(self.dates(), self.values())] def __len__(self): return len(self._dataframe) if self._dataframe is not None else 0
def convert_timezone(self, df: pd.DataFrame) -> pd.DataFrame: return df.tz_convert(self.local_tz)
def save_to_gpx(nav_df: pd.DataFrame, fileOutPN, gpx_obj_namef=None, waypoint_symbf=None, cfg_proc=None, gpx=None): # """ Save navigation from dataframe to *.gpx file. track or waypoints. Generate waypoints names and selects symbols from cfg['out']['gpx_symbols'] based on current row in nav_df :param nav_df: DataFrame with fields: if waypoint_symbf: itbl, ... :param fileOutPN: *.gpx file full name without extension. Set None to not write (useful if need only gpx) :param gpx_obj_namef: str or fun(waypoint number). If None then we set it to fileOutPN.stem :param waypoint_symbf: str or fun(nav_df record = row). If None saves track :param cfg_proc: 'simplify_tracks_error_m' 'dt_per_file' 'b_missed_coord_to_zeros' period_segments or period_tracks: to split track by this in one file :param gpx: gpx object to update. If None (default) then will be created here, updated and saved :return: None """ if nav_df.empty: l.warning('no data') return if gpx_obj_namef is None: gpx_obj_namef = Path(fileOutPN).stem if cfg_proc is None: cfg_proc = {'dt_per_file': None} elif not 'dt_per_file' in cfg_proc: cfg_proc['dt_per_file'] = None if gpx is None: gpx = GPX() if waypoint_symbf: # , fun_symbol= 'Waypoint', fun_name= str if isinstance(waypoint_symbf, str): s = waypoint_symbf waypoint_symbf = lambda x: s b_useDepEcho = 'DepEcho' in nav_df.columns and any(nav_df['DepEcho']) w_names = set() # w_name = None # same perpose for not all conditions but faster # nav_dft= nav_df.reset_index().set_index('itbl', drop=False, append=True) #, inplace=True # for t in range(nav_dft.itbl.min(), nav_dft.itbl.max()+1): #.ptp() = - for t, nav_dft in nav_df.groupby(['itbl']): # .reset_index() for i, r in enumerate(nav_dft.itertuples()): # .loc[t] name=None str_time_short = '{:%d %H:%M}'.format(r.Index.to_pydatetime()) timeUTC = r.Index.tz_convert(None).to_pydatetime() str_time_long = '{:%d.%m.%y %H:%M:%S}'.format(timeUTC) name = gpx_obj_namef if isinstance(gpx_obj_namef, str) else gpx_obj_namef(i, r, t) # remove duplicates by add letter name_test_dup = name i_dup = 0 while name_test_dup in w_names: # name== w_name or : name_test_dup = name + chr(97 + i_dup) # chr(97) = 'a' i_dup += 1 else: name = name_test_dup w_names.add(name) gpx_waypoint = GPXWaypoint( latitude=r.Lat, longitude=r.Lon, time=timeUTC, description=str_time_long, comment=str_time_short, name=name, symbol=waypoint_symbf(r), elevation=-r.DepEcho if b_useDepEcho and np.isfinite( r.DepEcho) else None) # , description=, type=, comment= # if not i_dup: # w_name = name # to check duplicates on next cycle gpx.waypoints.append(gpx_waypoint) if isinstance(gpx_obj_namef, str): gpx.description = gpx_obj_namef if fileOutPN: gpx.author_email = '*****@*****.**' write_file(fileOutPN, gpx.to_xml()) else: # tracks # loc= np.zeros_like(nav_df.index, dtype= int) # Lat= np.zeros_like(nav_df.index, dtype= np.float64) # Lon= np.zeros_like(nav_df.index, dtype= np.float64) # T= np.zeros_like(nav_df.index, dtype= pd.Timedelta) b_have_depth = ('DepEcho' in nav_df.columns) #b_have_speed = ('Speed' in nav_df.columns) period_split = cfg_proc.get('period_segments') or cfg_proc.get('period_tracks') if period_split: period_split = pd_period_to_timedelta(period_split) t_intervals_start = pd.date_range( start=nav_df.index[0].normalize(), end=max(nav_df.index[-1], nav_df.index[-1].normalize() + period_split), freq=period_split)[1:] # make last t_interval_start >= all_data[-1] #format_time = else: t_intervals_start = nav_df.index[-1:] # series with 1 last value t_interval_end = nav_df.index[0] n_intervals_without_data = 0 part = 0 nav_df = nav_df.tz_convert('utc', copy=False) Tprev = nav_df.index[0].to_pydatetime() Tcur = Tprev if not cfg_proc.get('period_tracks'): gpx_track = gpx_track_create(gpx, gpx_obj_namef) for t_interval_start in t_intervals_start: t_interval = slice(t_interval_end, t_interval_start) # from previous last # USEtime = [[t_interval_end.isoformat(), t_interval_start.isoformat()]] nav_df_cur = nav_df.truncate(t_interval_end, t_interval_start, copy=True) t_interval_end = t_interval_start # load_interval if not len(nav_df_cur): print('empty interval') n_intervals_without_data += 1 if n_intervals_without_data > 30: print('30 intervals without data => think it is the end') break continue gpx_segment = GPXTrackSegment() if cfg_proc.get('period_tracks'): fmt = '%y-%m-%d' if t_interval_start.second==0 and t_interval_start.hour==0 else '%y-%m-%d %H:%M' track_name = f'{gpx_obj_namef} {t_interval_start:{fmt}}' gpx_track = gpx_track_create(gpx, track_name) gpx_track[track_name].segments.append(gpx_segment) else: gpx_track[gpx_obj_namef].segments.append(gpx_segment) for i, r in enumerate(nav_df_cur.itertuples()): Tcur = r.Index.to_pydatetime() gpx_point = GPXTrackPoint( latitude=r.Lat, longitude=r.Lon, elevation=r.DepEcho if b_have_depth and not np.isnan(r.DepEcho) else None, time=Tcur) # , speed= speed_b, comment= Comment gpx_segment.points.append(gpx_point) # if i==1: # gpx.description= gpx_obj_namef # gpx.author_email= '*****@*****.**' # gpxxml= gpx.to_xml() # tree = ET.parse(gpxxml) # root = tree.getroot() if cfg_proc.get('simplify_tracks_error_m'): try: gpx_segment.points = gpxpy_simplify_polyline(gpx_segment.points, cfg_proc['simplify_tracks_error_m']) except RecursionError as e: recursion_limit = sys.getrecursionlimit() l.error('Check time in data! Why increasing old recursion limit (%s) is needed? Trying: x10...', recursion_limit) try: sys.setrecursionlimit(recursion_limit * 10) gpx_segment.points = gpxpy_simplify_polyline(gpx_segment.points, cfg_proc['simplify_tracks_error_m']) l.warning('now track simplified successfuly') except Exception as e: l.exception('not succes. skip simplifying tracks', recursion_limit) if cfg_proc['dt_per_file'] and Tcur - Tprev > cfg_proc['dt_per_file']: # save to next file part += 1 if fileOutPN: gpx_proc_and_save(gpx, gpx_obj_namef, cfg_proc, f'{fileOutPN}part{part}') gpx_track = gpx_track_create(gpx, gpx_obj_namef) Tprev = Tcur if fileOutPN: gpx_proc_and_save(gpx, gpx_obj_namef, cfg_proc, fileOutPN) return gpx