/
climate.py
383 lines (338 loc) · 15.4 KB
/
climate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
"""Climate data and mass balance computations"""
# Built ins
import logging
import os
import datetime
import json
import warnings
# External libs
import numpy as np
import xarray as xr
import netCDF4
import pandas as pd
from scipy import stats
from scipy import optimize
# Optional libs
try:
import salem
except ImportError:
pass
# Locals
from oggm import cfg
from oggm import utils
from oggm.core import centerlines
from oggm import entity_task, global_task
from oggm.exceptions import (MassBalanceCalibrationError, InvalidParamsError,
InvalidWorkflowError)
# Module logger
log = logging.getLogger(__name__)
@entity_task(log, writes=['climate_historical'])
def process_custom_climate_data(gdir, y0=None, y1=None, output_filesuffix=None):
"""Processes and writes the climate data from a user-defined climate file.
The input file must have a specific format
(see https://github.com/OGGM/oggm-sample-data
->test-files/histalp_merged_hef.nc for an example).
This is the way OGGM used to do it for HISTALP before it got added to the shop.
Parameters
----------
gdir : :py:class:`oggm.GlacierDirectory`
the glacier directory to process
y0 : int
the starting year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data)
y1 : int
the ending year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data). This year
will be included, i.e. 2019 means the data will end at 2019-12-01
output_filesuffix : str
this adds a suffix to the output file (useful to avoid overwriting
previous experiments)
"""
if not (('climate_file' in cfg.PATHS) and
os.path.exists(cfg.PATHS['climate_file'])):
raise InvalidParamsError('Custom climate file not found')
if cfg.PARAMS['baseline_climate'] not in ['', 'CUSTOM']:
raise InvalidParamsError("When using custom climate data please set "
"PARAMS['baseline_climate'] to an empty "
"string or `CUSTOM`. Note also that you can "
"now use the `process_histalp_data` task for "
"automated HISTALP data processing.")
# read the file
fpath = cfg.PATHS['climate_file']
nc_ts = salem.GeoNetcdf(fpath)
# Avoid reading all data
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning)
nc_ts.set_subset(((gdir.cenlon, gdir.cenlat),
(gdir.cenlon, gdir.cenlat)),
margin=2) # 2 is to be sure - also on small files
yrs = nc_ts.time.year
mths = nc_ts.time.month
y0 = yrs[0] if y0 is None else y0
if mths[0] != 1:
# The file starts at the wrong month
y0 += 1
y1 = yrs[-1] if y1 is None else y1
if mths[-1] != 12:
# Same: wrong month
y1 -= 1
nc_ts.set_period(t0=f'{y0}-01-01', t1=f'{y1}-12-01')
time = nc_ts.time
ny, r = divmod(len(time), 12)
if r != 0:
raise InvalidParamsError('Climate data should be full years')
# Units
assert nc_ts._nc.variables['hgt'].units.lower() in ['m', 'meters', 'meter',
'metres', 'metre']
assert nc_ts._nc.variables['temp'].units.lower() in ['degc', 'degrees',
'degree', 'c']
assert nc_ts._nc.variables['prcp'].units.lower() in ['kg m-2', 'l m-2',
'mm', 'millimeters',
'millimeter']
# geoloc
lon = nc_ts.get_vardata('lon')
lat = nc_ts.get_vardata('lat')
ilon = np.argmin(np.abs(lon - gdir.cenlon))
ilat = np.argmin(np.abs(lat - gdir.cenlat))
ref_pix_lon = lon[ilon]
ref_pix_lat = lat[ilat]
# read the data
temp = nc_ts.get_vardata('temp')
prcp = nc_ts.get_vardata('prcp')
hgt = nc_ts.get_vardata('hgt')
ttemp = temp[:, ilat-1:ilat+2, ilon-1:ilon+2]
itemp = ttemp[:, 1, 1]
thgt = hgt[ilat-1:ilat+2, ilon-1:ilon+2]
ihgt = thgt[1, 1]
thgt = thgt.flatten()
iprcp = prcp[:, ilat, ilon]
nc_ts.close()
gdir.write_monthly_climate_file(time, iprcp, itemp, ihgt,
ref_pix_lon, ref_pix_lat,
filesuffix=output_filesuffix,
source=fpath)
@entity_task(log)
def process_climate_data(gdir, y0=None, y1=None, output_filesuffix=None,
**kwargs):
"""Adds the selected climate data to this glacier directory.
Short wrapper deciding on which task to run based on
`cfg.PARAMS['baseline_climate']`.
If you want to make it explicit, simply call the relevant task
(e.g. oggm.shop.cru.process_cru_data).
Parameters
----------
gdir : :py:class:`oggm.GlacierDirectory`
the glacier directory to process
y0 : int
the starting year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data)
y1 : int
the starting year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data)
output_filesuffix : str
this add a suffix to the output file (useful to avoid overwriting
previous experiments)
**kwargs :
any other argument relevant to the task that will be called.
"""
# Which climate should we use?
baseline = cfg.PARAMS['baseline_climate']
if baseline == 'CRU':
from oggm.shop.cru import process_cru_data
process_cru_data(gdir, output_filesuffix=output_filesuffix,
y0=y0, y1=y1, **kwargs)
elif baseline == 'HISTALP':
from oggm.shop.histalp import process_histalp_data
process_histalp_data(gdir, output_filesuffix=output_filesuffix,
y0=y0, y1=y1, **kwargs)
elif baseline == 'W5E5':
from oggm.shop.w5e5 import process_w5e5_data
process_w5e5_data(gdir, output_filesuffix=output_filesuffix,
y0=y0, y1=y1, **kwargs)
elif baseline == 'GSWP3_W5E5':
from oggm.shop.w5e5 import process_gswp3_w5e5_data
process_gswp3_w5e5_data(gdir, output_filesuffix=output_filesuffix,
y0=y0, y1=y1, **kwargs)
elif baseline in ['ERA5', 'ERA5L', 'CERA', 'ERA5dr', 'ERA5L-HMA']:
from oggm.shop.ecmwf import process_ecmwf_data
process_ecmwf_data(gdir, output_filesuffix=output_filesuffix,
dataset=baseline, y0=y0, y1=y1, **kwargs)
elif '+' in baseline:
# This bit below assumes ECMWF only datasets, but it should be
# quite easy to extend for HISTALP+ERA5L for example
from oggm.shop.ecmwf import process_ecmwf_data
his, ref = baseline.split('+')
s = 'tmp_'
process_ecmwf_data(gdir, output_filesuffix=s+his, dataset=his,
y0=y0, y1=y1, **kwargs)
process_ecmwf_data(gdir, output_filesuffix=s+ref, dataset=ref,
y0=y0, y1=y1, **kwargs)
historical_delta_method(gdir,
ref_filesuffix=s+ref,
hist_filesuffix=s+his,
output_filesuffix=output_filesuffix)
elif '|' in baseline:
from oggm.shop.ecmwf import process_ecmwf_data
his, ref = baseline.split('|')
s = 'tmp_'
process_ecmwf_data(gdir, output_filesuffix=s+his, dataset=his,
y0=y0, y1=y1, **kwargs)
process_ecmwf_data(gdir, output_filesuffix=s+ref, dataset=ref,
y0=y0, y1=y1, **kwargs)
historical_delta_method(gdir,
ref_filesuffix=s+ref,
hist_filesuffix=s+his,
output_filesuffix=output_filesuffix,
replace_with_ref_data=False)
elif baseline == 'CUSTOM':
process_custom_climate_data(gdir, y0=y0, y1=y1,
output_filesuffix=output_filesuffix,
**kwargs)
else:
raise ValueError("cfg.PARAMS['baseline_climate'] not understood")
@entity_task(log, writes=['climate_historical'])
def historical_delta_method(gdir, ref_filesuffix='', hist_filesuffix='',
output_filesuffix='', ref_year_range=None,
delete_input_files=True, scale_stddev=True,
replace_with_ref_data=True):
"""Applies the anomaly method to historical climate data.
This function can be used to prolongate historical time series,
for example by bias-correcting CERA-20C to ERA5 or ERA5-Land.
The timeseries must be already available in the glacier directory
Parameters
----------
gdir : :py:class:`oggm.GlacierDirectory`
where to write the data
ref_filesuffix : str
the filesuffix of the historical climate data to take as reference
hist_filesuffix : str
the filesuffix of the historical climate data to apply to the
reference
output_filesuffix : str
the filesuffix of the output file (usually left empty - i.e. this
file will become the default)
ref_year_range : tuple of str
the year range for which you want to compute the anomalies. The
default is to take the entire reference data period, but you could
also choose `('1961', '1990')` for example
delete_input_files : bool
delete the input files after use - useful for operational runs
where you don't want to carry too many files
scale_stddev : bool
whether to scale the temperature standard deviation as well
(you probably want to do that)
replace_with_ref_data : bool
the default is to paste the bias-corrected data where no reference
data is available, i.e. creating timeseries which are not consistent
in time but "better" for recent times (e.g. CERA-20C until 1980,
then ERA5). Set this to False to prevent this and make a consistent
time series of CERA-20C (but bias corrected to the reference data,
so "better" than CERA-20C out of the box).
"""
if ref_year_range is not None:
raise NotImplementedError()
# Read input
f_ref = gdir.get_filepath('climate_historical', filesuffix=ref_filesuffix)
with xr.open_dataset(f_ref) as ds:
ref_temp = ds['temp']
ref_prcp = ds['prcp']
ref_hgt = float(ds.ref_hgt)
ref_lon = float(ds.ref_pix_lon)
ref_lat = float(ds.ref_pix_lat)
source = ds.attrs.get('climate_source')
f_his = gdir.get_filepath('climate_historical', filesuffix=hist_filesuffix)
with xr.open_dataset(f_his) as ds:
hist_temp = ds['temp']
hist_prcp = ds['prcp']
# To differentiate both cases
if replace_with_ref_data:
source = ds.attrs.get('climate_source') + '+' + source
else:
source = ds.attrs.get('climate_source') + '|' + source
# Common time period
cmn_time = (ref_temp + hist_temp)['time']
assert len(cmn_time) // 12 == len(cmn_time) / 12
# We need an even number of years for this to work
if ((len(cmn_time) // 12) % 2) == 1:
cmn_time = cmn_time.isel(time=slice(12, len(cmn_time)))
assert len(cmn_time) // 12 == len(cmn_time) / 12
assert ((len(cmn_time) // 12) % 2) == 0
cmn_time_range = cmn_time.values[[0, -1]]
# Select ref
sref_temp = ref_temp.sel(time=slice(*cmn_time_range))
sref_prcp = ref_prcp.sel(time=slice(*cmn_time_range))
# See if we need to scale the variability
if scale_stddev:
# This is a bit more arithmetic
tmp_sel = hist_temp.sel(time=slice(*cmn_time_range))
tmp_std = tmp_sel.groupby('time.month').std(dim='time')
std_fac = sref_temp.groupby('time.month').std(dim='time') / tmp_std
std_fac = np.tile(std_fac.data, len(hist_temp) // 12)
win_size = len(cmn_time) + 1
def roll_func(x, axis=None):
x = x[:, ::12]
n = len(x[0, :]) // 2
xm = np.nanmean(x, axis=axis)
return xm + (x[:, n] - xm) * std_fac
hist_temp = hist_temp.rolling(time=win_size, center=True,
min_periods=1).reduce(roll_func)
# compute monthly anomalies
# of temp
ts_tmp_sel = hist_temp.sel(time=slice(*cmn_time_range))
ts_tmp_avg = ts_tmp_sel.groupby('time.month').mean(dim='time')
ts_tmp = hist_temp.groupby('time.month') - ts_tmp_avg
# of precip -- scaled anomalies
ts_pre_avg = hist_prcp.sel(time=slice(*cmn_time_range))
ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
ts_pre_ano = hist_prcp.groupby('time.month') - ts_pre_avg
# scaled anomalies is the default. Standard anomalies above
# are used later for where ts_pre_avg == 0
ts_pre = hist_prcp.groupby('time.month') / ts_pre_avg
# reference averages
# for temp
loc_tmp = sref_temp.groupby('time.month').mean()
ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
# for prcp
loc_pre = sref_prcp.groupby('time.month').mean()
# scaled anomalies
ts_pre = ts_pre.groupby('time.month') * loc_pre
# standard anomalies
ts_pre_ano = ts_pre_ano.groupby('time.month') + loc_pre
# Correct infinite values with standard anomalies
ts_pre.values = np.where(np.isfinite(ts_pre.values),
ts_pre.values,
ts_pre_ano.values)
# The previous step might create negative values (unlikely). Clip them
ts_pre.values = utils.clip_min(ts_pre.values, 0)
assert np.all(np.isfinite(ts_pre.values))
assert np.all(np.isfinite(ts_tmp.values))
if not replace_with_ref_data:
# Just write what we have
gdir.write_monthly_climate_file(ts_tmp.time.values,
ts_pre.values, ts_tmp.values,
ref_hgt, ref_lon, ref_lat,
filesuffix=output_filesuffix,
source=source)
else:
# Select all hist data before the ref
ts_tmp = ts_tmp.sel(time=slice(ts_tmp.time[0], ref_temp.time[0]))
ts_tmp = ts_tmp.isel(time=slice(0, -1))
ts_pre = ts_pre.sel(time=slice(ts_tmp.time[0], ref_temp.time[0]))
ts_pre = ts_pre.isel(time=slice(0, -1))
# Concatenate and write
gdir.write_monthly_climate_file(np.append(ts_pre.time, ref_prcp.time),
np.append(ts_pre, ref_prcp),
np.append(ts_tmp, ref_temp),
ref_hgt, ref_lon, ref_lat,
filesuffix=output_filesuffix,
source=source)
if delete_input_files:
# Delete all files without suffix
if ref_filesuffix:
os.remove(f_ref)
if hist_filesuffix:
os.remove(f_his)