/
timeseries.py
279 lines (220 loc) · 9.53 KB
/
timeseries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
import numpy as np
import pandas as pd
import logging
from pytesmo.time_series.anomaly import calc_climatology
from pytesmo.time_series.anomaly import calc_anomaly as calc_anom_pytesmo
def calc_anom(Ser, mode='climatological', window_size=35, return_clim=False, return_clim366=False):
'''
:param Ser: pandas.Series; index must be a datetime index
:param mode: string; one of:
"climatological": calculate anomalies from the mean seasonal cycle
"longterm": inter-annual variabilities only (climatological anomalies minus short-term anomalies)
"shortterm": residuals from the seasonality (i.e., moving average) of each individual year
:param window_size: integer; window size for calculating the climatology and/or seasonality
:param return_clim: boolean; If true, the climatology value is returned for each timestep of the input Series
This overrules the "mode" keyword!
:param return_clim366: boolean; If true, the actual climatology is returned (366 values)
This overrules both the "mode" and "return_clim" keywords!
'''
if mode not in ['climatological', 'longterm', 'shortterm']:
logging.error('calc_anom: unknown anomaly type')
return None
# Calculate the climatology
if (mode != 'shortterm') | return_clim | return_clim366:
clim = calc_climatology(Ser, respect_leap_years=True, wraparound=True, moving_avg_clim=window_size, fillna=False)
else:
clim = None
# Return the actual climatology (366 values)
if return_clim366:
return clim
# Calculate either climatological or short-term anomalies
res = calc_anom_pytesmo(Ser, climatology=clim, window_size=window_size, return_clim=return_clim)
# Derive long-term anomalies by subtracting short-term anomalies from climatological anomalies
if (mode == 'longterm') and not return_clim:
res -= calc_anom_pytesmo(Ser, climatology=None, window_size=window_size)
# Return climatology values for each time step of the input Series
if return_clim:
res = res['climatology']
res.name = Ser.name
return res
def calc_anomaly(Ser, method='moving_average', output='anomaly', longterm=False, window_size=35, n=3):
if (output=='climatology')&(longterm is True):
output = 'climSer'
xSer = Ser.dropna().copy()
if len(xSer) == 0:
return xSer
doys = xSer.index.dayofyear.values
doys[xSer.index.is_leap_year & (doys > 59)] -= 1
climSer = pd.Series(index=xSer.index)
if not method in ['harmonic','mean','moving_average','ma']:
logging.error('Unknown method: ' + method)
return climSer
if longterm is True:
if method=='harmonic':
clim = calc_clim_harmonic(xSer, n=n)
if method=='mean':
clim = calc_clim_harmonic(xSer, n=0)
if (method=='moving_average')|(method=='ma'):
clim = calc_clim_moving_average(xSer, window_size=window_size)
if output == 'climatology':
return clim
climSer[:] = clim[doys]
else:
years = xSer.index.year
for yr in np.unique(years):
if method == 'harmonic':
clim = calc_clim_harmonic(xSer[years == yr], n=n)
if method == 'mean':
clim = calc_clim_harmonic(xSer[years == yr], n=0)
if (method == 'moving_average') | (method == 'ma'):
clim = calc_clim_moving_average(xSer[years == yr], window_size=window_size)
climSer[years == yr] = clim[doys[years == yr]].values
if output == 'climSer':
return climSer.reindex(Ser.index)
climSer.name = xSer.name
return xSer - climSer
def calc_clim_harmonic(Ser, n=3, cutoff=False):
"""
Calculates the mean seasonal cycle of a data set
by fitting harmonics.
(!! Leap years are not yet properly treated !!)
Parameters
----------
Ser : pd.Series w. DatetimeIndex
Timeseries of which the climatology shall be calculated.
n : int (optional)
Number of harmonics that should be fitted.
n=0 : long term mean
n=1 : long term mean + annual cycle
n=2 : long term mean + annual + half-annual cycle
n=3 : long term mean + annual + half-annual + seasonal cycle
cutoff : boolean
If set, the climatology is not allowed to exceed the min/max of the original time series.
Returns
-------
clim : pd.Series
climatology of Ser (without leap days)
"""
T = 365
xSer = Ser.dropna().copy()
doys = xSer.index.dayofyear.values
# in leap years, subtract 1 for all days after Feb 28
doys[xSer.index.is_leap_year & (doys>59)] -= 1
A = np.ones((len(doys),2*n+1))
for j in np.arange(n)+1:
A[:,j] = np.cos(j * 2 * np.pi * doys / T)
A[:,j+n] = np.sin(j * 2 * np.pi * doys / T)
A = np.matrix(A)
y = np.matrix(xSer.values).T
try:
x = np.array((A.T * A).I * A.T * y).flatten()
except:
x = np.full(2*n+1,np.nan)
doys = np.arange(T)+1
clim = pd.Series(index=np.arange(T)+1)
clim[:] = x[0]
for j in np.arange(n)+1:
clim[:] += x[j] * np.cos(j * 2 * np.pi * doys / T) + x[j+n] * np.sin(j * 2 * np.pi * doys / T)
if (cutoff is True)&(len(clim.dropna()!=0)):
p = np.nanpercentile(xSer.values, [5,95])
clim[(clim<p[0])|(clim>p[1])] = np.nan
return clim
def calc_clim_moving_average(Ser, window_size=35, n_min=5, return_n=False):
"""
Calculates the mean seasonal cycle as long-term mean within a moving average window.
Parameters
----------
Ser : pd.Series w. DatetimeIndex
Timeseries of which the climatology shall be calculated.
window_size : int
Moving Average window size
n_min : int
Minimum number of data points to calculate average
return_n : boolean
If true, the number of data points over which is averaged is returned
Returns
-------
clim : pd.Series
climatology of Ser (without leap days)
n_days : pd.Series
the number of data points available within each window
"""
xSer = Ser.dropna().copy()
doys = xSer.index.dayofyear.values
# in leap years, subtract 1 for all days after Feb 28
doys[xSer.index.is_leap_year & (doys > 59)] -= 1
clim_doys = np.arange(365) + 1
clim = pd.Series(index=clim_doys)
n_data = pd.Series(index=clim_doys)
for doy in clim_doys:
# Avoid artifacts at start/end of year
tmp_doys = doys.copy()
if doy < window_size/2.:
tmp_doys[tmp_doys > 365 - (np.ceil(window_size/2.)-doy)] -= 365
if doy > 365 - (window_size/2. - 1):
tmp_doys[tmp_doys < np.ceil(window_size/2.) - (365-doy)] += 365
n_data[doy] = len(xSer[(tmp_doys >= doy - np.floor(window_size/2.)) & \
(tmp_doys <= doy + np.floor(window_size/2.))])
if n_data[doy] >= n_min:
clim[doy] = xSer[(tmp_doys >= doy - np.floor(window_size/2.)) & \
(tmp_doys <= doy + np.floor(window_size/2.))].values.mean()
if return_n is False:
return clim
else:
return clim, n_data
def calc_clim_p(ts, mode='pentadal', n=3):
if mode == 'pentadal':
clim = calc_pentadal_mean(ts)
else:
clim = calc_clim_harmonic(ts, n=n)
pentads = np.floor((clim.index.values - 1) / 5.)
clim = clim.groupby(pentads,axis=0).mean()
clim.index = np.arange(73)+1
return clim
def calc_pentadal_mean_std(Ser, n_min=9, return_n=False):
"""
Calculates the mean seasonal cycle as long-term mean within a 45 days moving average window
for each pentad (Faster than "calc_clim_moving_average" because output only per pentad)
Parameters
----------
Ser : pd.Series w. DatetimeIndex
Timeseries of which the climatology shall be calculated.
n_min : int
Minimum number of data points to calculate average
return_n : boolean
If true, the number of data points over which is averaged is returned
Returns
-------
clim : pd.Series
climatology of Ser (without leap days)
n_days : pd.Series
the number of data points available within each window
"""
xSer = Ser.dropna().copy()
doys = xSer.index.dayofyear.values
# in leap years, subtract 1 for all days after Feb 28
doys[xSer.index.is_leap_year & (doys > 59)] -= 1
Ser_pentad = np.floor((doys - 1) / 5.) + 1
pentads = np.arange(73) + 1
clim_mean = pd.Series(index=pentads)
clim_std = pd.Series(index=pentads)
n_data = pd.Series(index=pentads)
for p in pentads:
tmp_pentad = Ser_pentad.copy()
if p < 5:
tmp_pentad[tmp_pentad > 10] -= 73
if p > 69:
tmp_pentad[tmp_pentad < 60] += 73
n_data[p] = len(xSer[(tmp_pentad >= p - 4) & (tmp_pentad <= p + 4)])
if n_data[p] >= n_min:
clim_mean[p] = xSer[(tmp_pentad >= p - 4) & (tmp_pentad <= p + 4)].values.mean()
clim_std[p] = xSer[(tmp_pentad >= p - 4) & (tmp_pentad <= p + 4)].values.std()
# --- Time series are returned per pentad as needed for creating LDASSa scaling files!!
# --- The following can map it to 365 values
# doys = np.arange(1, 366).astype('int')
# ind = np.floor((doys - 1) / 5.).astype('int') + 1
# clim365 = pd.Series(clim_fcst.loc[ind].values, index=doys)
if return_n is False:
return clim_mean, clim_std
else:
return clim_mean, clim_std, n_data