-
Notifications
You must be signed in to change notification settings - Fork 8
/
OBS.py
400 lines (300 loc) · 13.6 KB
/
OBS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
# THis module contaiins codes for retrieving and visualizing various observation types.
# currently this is pretty kludge-y and contains only quick subroutines that I wrote
# to do a few things I needed to do.
# load the required packages
import numpy as np
import pandas as pd
import datetime
import experiment_settings as es
import DART as dart
import os.path
#from netCDF4 import Dataset
def HRRS_as_DF(OBS,TPbased=False,TPbased_vertical_res=50E-3,hostname='taurus',debug=False):
"""
Loop over a set of dates and a specified latitude- and longitude range, and return
the available high-resolution radiosonde data as a pandas data frame
INPUTS:
OBS: a dictionary with the following entries:
daterange: a list of datetime objects that give the desired date range
latrange: a list giving the bounding latitudes of the desired range
lonrange: a list giving the bounding longitudes of the desired range
Note that OBS can be a DART experiment dictionary (see DART.py), but the DART/model
specific entries are ignored.
TPbased: set to True to return the profiles ordered into regularly-spaced altitudes
relative to the tropopause - default is False.
hostname: default is taurus
debug: set to True to print some stuff out. Default is False.
TPbased_vertical_res: resolution of the grid to which we inteprolate the obs doing TP-based
coordinates. Default is 50m.
"""
# first read in station information as a dataframe
stationdata = HRRS_station_data(hostname)
# initialize an empy list which will hold the data frames for each station and time
DFlist=[]
# because the HRRS data are sorted by years, loop over the years in the daterange
DR=OBS['daterange']
y0 = DR[0].year
yf = DR[len(DR)-1].year
years = range(y0,yf+1,1)
for YYYY in years:
# load a list of the available stations for that year
Slist = HRRS_stations_available_per_year(YYYY)
# trim list down to the ones that fit into the latitude range
stations_lat = [s for s in Slist
if stationdata.loc[int(s)]['Lat'] >= OBS['latrange'][0]
and stationdata.loc[int(s)]['Lat'] <= OBS['latrange'][1] ]
# trim list down to the ones that fit into the longitude range
stations_latlon = [s for s in stations_lat
if stationdata.loc[int(s)]['Lon'] >= OBS['lonrange'][0]
and stationdata.loc[int(s)]['Lon'] <= OBS['lonrange'][1] ]
# also compute the subset of the requested daterange that fits into this year.
year_daterange = dart.daterange(date_start=datetime.datetime(YYYY,1,1,0,0,0), periods=365*4, DT='6H')
DR2 = set(year_daterange).intersection(DR)
# also find the dir where the station data live
datadir = es.obs_data_paths('HRRS',hostname)
# now loop over available stations, and for each one, retrieve the data
# that fit into the requested daterange
for s in stations_latlon:
# loop over dates, and retrieve data if available
for dd in DR2:
datestr = dd.strftime("%Y%m%d%H")
ff = datadir+'/'+str(YYYY)+'/'+str(s)+'/'+str(s)+'-'+datestr+'_mod.dat'
if os.path.exists(ff):
if debug:
print(ff)
# read in the station data
if TPbased:
D = TP_based_HRRS_data(ff,vertical_res_km=TPbased_vertical_res)
alt_to_km = 1.0 # here the altitude is already in km
temp_to_K = 0.0
else:
D = read_HRRS_data(ff)
alt_to_km = 1.0E-3 # raw data are in m -- convert to km
temp_to_K = 273.15 # raw data need to be converted to kelvin
if D is not None:
# also add a column holding the date
D['Date'] = pd.Series(dd, index=D.index)
# also add a column holding the station number
D['StationNumber'] = pd.Series(s, index=D.index)
# make sure altitude is in km
# and temp in Kelvin
D['Alt']=D['Alt']*alt_to_km
D['Temp']=D['Temp']+temp_to_K
# get rid of some unneeded columns
if not TPbased:
useless_cols=['Time','Dewpt','RH','Ucmp','Vcmp','spd','dir',
'Wcmp', 'Ele', 'Azi', 'Qp', 'Qt', 'Qrh', 'Qu', 'Qv', 'QdZ']
D.drop(useless_cols,inplace=True,axis=1)
# append to list of data frames
DFlist.append(D)
# merge the list of data frames into a single DF using list comprehension
DFout = pd.concat(DFlist, axis=0)
return(DFout)
def TP_based_HRRS_data(ff,vertical_res_km=50E-3,debug=False,hostname='taurus'):
"""
Given a single high-res radiosonde data sounding (identified by its
full file path, ff)
load the data from the sounding and compute the temperature data
as a function of distance from the thermal tropopause.
This is done by:
1. reading in the data as a pandas data frame
2. computing the height of the tropopause
3. computin the altitude of each data point relative to the tropopause
4. using a cubic spline to create evenly-spaced temperatures on a vertical
grid with 50m spacing.
This procedure is based on Birner et al. 2002 (http://doi.wiley.com/10.1029/2002GL015142)
Here the LR tropopause follows the WMO criterion. Quoting Birner et al. (2002):
The thermal TP is defined as the lowest level where the temperature lapse rate falls
below 2 K/km and its average between this level and all higher levels within 2 km remains below this value [WMO, 1957].
INPUTS:
ff: the full path to the HRRS profile that we will load
vertical_res_km: vertical resolution of the grid to which we interpolate, in km. The default is 50m, which is roughly the vertical resolution of the HRRS obs.
"""
# read in the data as a data frame
DF0 = read_HRRS_data(ff)
# drop unnecessary columns
useless_cols=['Time','Dewpt','RH','Ucmp','Vcmp','spd','dir','Lat','Lon',
'Wcmp', 'Ele', 'Azi', 'Qp', 'Qt', 'Qrh', 'Qu', 'Qv', 'QdZ']
DF0.drop(useless_cols,inplace=True,axis=1)
# get rid of NaNs
DF=DF0.dropna()
if debug:
print('Loading file '+ff)
# load interpolate function from scipy
from scipy.interpolate import interp1d
# compute the height of the lapse-tropopause from the altitude array
z=DF['Alt']/1E3 # Altitude in km
T=DF['Temp']+273.15 # Temp in Kelvin
P=DF['Press']
N2=DF['N2']
from TIL import ztrop
ztropp=ztrop(z=z,T=T,debug=debug,hostname=hostname)
if ztropp is not None:
# extract the station number from the file path
file_components=ff.split('/')
station=file_components[len(file_components)-2]
# retrieve the mean tropopause height for this station
# TODO: right noew this reads in a csv file of mean heights for Jan 2010.
# ...need to make this more dynamic and obviously not user and host specific
ZT=pd.read_csv('/data/c1/lneef/HRRS//mean_tropopause_height_per_station_20100101-20100131.csv',index_col=0)
ztrop_mean = ZT.loc[int(station)].ztrop_mean
# now compute the altitude relative to the tropopause, plus mean tropopause height
zTP = DF['Alt']*1E-3-ztropp+ztrop_mean
# interpolate temp, pressure to this new coordinate
fT = interp1d(zTP, T, kind='linear')
fP = interp1d(zTP, P, kind='linear')
# create a regularly spaced grid (in km)
zTPgrid=np.arange(0.0,26.0, vertical_res_km)
# select whatever part of the regular grid fits into the range sampled by this sounding
select = np.where(np.logical_and(zTPgrid>min(zTP), zTPgrid<max(zTP)))
zTPnew=zTPgrid[select]
# now compute the variables on this grid using the interpolate function
Tnew = fT(zTPnew)
Pnew = fP(zTPnew)
# N2 comes out quite noisy when computed from raw radiosonde observations.
# The spline (needed to get the obs on a common grid) is an opportunity for smoothing
# the temperature field a bit, which will yield a smoother N2 profile -- so just recompute
# N2 here
from TIL import Nsq
N2new = Nsq(Tnew,zTPnew,Pnew)
# now create a new dataframe with the TP-based heights
new_data={'Press':Pnew,'Temp':Tnew,'Alt':zTPnew,'N2':N2new,'ztropp':ztropp}
Dout = pd.DataFrame(data=new_data)
else:
if debug:
print('No clear lapse-rate tropopause found for the following sounding:')
print(ff)
print('Returning None')
Dout=None
return(Dout)
def HRRS_mean_ztrop_to_csv(DR,hostname='taurus',debug=False):
"""
Given a certain daterange, retrieve available high res radiosonde data,
compute the average tropopause height per station, and store in a
csv file.
"""
from TIL import ztrop
# first read in station information as a dataframe
stationdata = HRRS_station_data(hostname)
# because the HRRS data are sorted by years, loop over the years in the daterange
y0 = DR[0].year
yf = DR[len(DR)-1].year
years = range(y0,yf+1,1)
for YYYY in years:
# load a list of the available stations for that year
Slist = HRRS_stations_available_per_year(YYYY)
# also compute the subset of the requested daterange that fits into this year.
year_daterange = dart.daterange(date_start=datetime.datetime(YYYY,1,1,0,0,0), periods=365*4, DT='6H')
DR2 = set(year_daterange).intersection(DR)
# also find the dir where the station data live
datadir = es.obs_data_paths('HRRS',hostname)
# initialize empty dictionary to hold average tropoopause heights per station
ztrop_dict=dict()
# now loop over available stations, and for each one, retrieve the data
# that fit into the requested daterange
for s in Slist:
ztrop_list=[] # empty list to hold tropopause heights for all available obs per station
# loop over dates, and retrieve data if available
for dd in DR2:
datestr = dd.strftime("%Y%m%d%H")
ff = datadir+'/'+str(YYYY)+'/'+str(s)+'/'+str(s)+'-'+datestr+'_mod.dat'
if os.path.exists(ff):
if debug:
print(ff)
# read in the station data
D = read_HRRS_data(ff)
# compute tropopause height
z=D['Alt']/1E3 # Altitude in km
T=D['Temp']+273.15 # Temp in Kelvin
ztropp=ztrop(z=z,T=T,debug=debug,hostname=hostname)
# add to list if not none
if ztropp is not None:
ztrop_list.append(ztropp)
# average the tropopause heights and add to dictionary
ztrop_dict[s]=np.mean(ztrop_list)
# turn dict into data frame
ZT=pd.Series(data=ztrop_dict, name='ztrop_mean')
if debug:
print(ZT)
# turn dataframe into csv file
hrrs_path = es.obs_data_paths('HRRS',hostname)
datestr = DR[0].strftime("%Y%m%d")+'-'+DR[len(DR)-1].strftime("%Y%m%d")+'.csv'
fname=hrrs_path+'/'+'mean_tropopause_height_per_station_'+datestr
print('storing file '+fname)
ZT.to_csv(fname, index=True, sep=',',header=True)
return(ZT)
def read_HRRS_data(ff):
"""
Read in a .dat file from SPARC high-res radiosonde data
Input ff is a string pointing to the full path of the desired file.
"""
# here is a dict that gives bad values for different columns
# alert: this is still incomplete
badvals = {'Temp':['999.0'],'Alt':['99.0','99999.0'],'Lat':['999.000'],'Lon':['9999.000']}
D= pd.read_csv(ff,skiprows=13,error_bad_lines=False,delim_whitespace=True,na_values=badvals)
colnames=list(D.columns.values)
# kick out the first two rows - they hold units and symbols
D.drop(D.index[[0,1]], inplace=True)
# also make sure that lat, lon, pressure, altitude, and temp are numerics
vars_to_float = ['Press','Temp','Lat','Lon','Alt']
D[vars_to_float] = D[vars_to_float].astype(float)
# compute the vertical gradient of potential temp and, from that, buoyancy frequency
P0=1000.0
Rd = 286.9968933 # Gas constant for dry air J/degree/kg
g = 9.80616 # Acceleration due to gravity m/s^2
cp = 1005.0 # heat capacity at constant pressure m^2/s^2*K
theta=(D['Temp']+273.15)*(P0/D['Press'])**(Rd/cp) # note that this includes conversion of Celsius to Kelvin
dZ = np.gradient(D['Alt'])
dthetadZ = np.gradient(theta,dZ)
D["N2"]=(g/theta)*dthetadZ
return(D)
def HRRS_stations_available_per_year(YYYY):
"""
Given a specific calendar year (in integer form), return a list of the available
high-res radiosonde stations for that year
TODO: so far only have 2010 coded in ...need to add others
"""
stations_avail_dict={2010:['03160','04102','12850','14607','14918',
'22536','25624','26510','26616','40308','40504',
'40710','61705','03190','11641','13985','14684',
'21504','25501','25713','26615','27502','40309',
'40505','41406']
}
return(stations_avail_dict[YYYY])
def HRRS_station_data(hostname='taurus'):
"""
Read in information about the high-res radiosondes and return it as a pandas dataframe.
"""
datadir = es.obs_data_paths('HRRS',hostname)
ff=datadir+'ListOfStations.dat'
colnames=[ 'WBAN','Station_Name','State','Country','WMO_Code','Lat','Lon','Height','Transition date']
stations = pd.read_csv(ff,delimiter=",",error_bad_lines=False,skiprows=1,names=colnames,index_col='WBAN')
# a few columns have to be coerced to numeric
stations[['Lat','Lon']] = stations[['Lat','Lon']].apply(pd.to_numeric, errors='coerce')
return(stations)
def read_COSMIC_TPbased_mean(E,hostname='taurus'):
"""
Read in COSMIC temp and N2 data averaged with respect to the local tropopause.
"""
from netCDF4 import Dataset
# find the path to the data
datadir = es.obs_data_paths('COSMIC',hostname)
# TODO: right now this loads the only file I have. Later can code a dynamic way to
# choose files based on what is specified in E.
ff = 'mean_GPS-RO_45-60N_COSMIC_Jan2010_TPbased_mean.nc'
filename=datadir+ff
# open the file and read in the relevant data into dict
D = dict()
f = Dataset(filename,'r')
varnames = {'T':'T',
'Nsq':'N2'}
variable = varnames[E['variable']]
D['z'] = f.variables['z'][:]
D['data'] = f.variables[variable][:]
D['units']= f.variables[variable].units
# one last thing! Convert Celsius to Kelvin
if 'Celsius' in f.variables[variable].units:
D['data']=f.variables[variable][:]+273.5
D['units']='K'
f.close()
return D