-
Notifications
You must be signed in to change notification settings - Fork 0
/
compile_daily_TS.py
255 lines (227 loc) · 9.27 KB
/
compile_daily_TS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# read hdfeos5 module
import h5py
# write averaged time series to csv
import csv
# use maths and dates, and glob for files list
import numpy as np
from datetime import datetime, timedelta
from mpl_toolkits.basemap import maskoceans
from glob import glob
_swathesfolder="/media/jesse/My Book/jwg366/Satellite/Aura/OMI/OMHCHOSubset/"
#_swathesfolder="data/"
def get_ocean_mask(lats,lons):
'''
returns mask with true's over oceanic squares (and nans)
'''
# set nans to an ocean square
nans=np.isnan(lats)
lat=np.copy(lats)
lat[nans] = -50
lon=np.copy(lons)
lon[nans] = 100
# get mask of ocean squares
mask=maskoceans(lon,lat,lon,inlands=False).mask
mask[nans] = True
return mask
def read_omi_swath(path,removerowanomaly=True, cloudy=0.4, screen=[-0.5e16, 1e17], szamax=60, mask_ocean=False, mask_land=False):
'''
Read info from a single swath file
NANify entries with main quality flag not equal to zero
Filtering: remove pixels with following properties
cloud frac > cloudy
Col Density outside screen range
solar zenith angle > szamax
quality flag not 0
xtrack flag not 0
Returns:
(hcho, hcho_corrected, lats, lons)
'''
# Total column amounts are in molecules/cm2
datafields='/HDFEOS/SWATHS/OMI Total Column Amount HCHO/Data Fields/'
geofields='/HDFEOS/SWATHS/OMI Total Column Amount HCHO/Geolocation Fields/'
#useful fields
field_hcho = datafields+'ColumnAmount'
field_hcho_rsc = datafields+'ReferenceSectorCorrectedVerticalColumn'
field_qf = datafields+'MainDataQualityFlag'
field_clouds= datafields+'AMFCloudFraction'
field_xqf = geofields +'XtrackQualityFlags'
field_lon = geofields +'Longitude'
field_lat = geofields +'Latitude'
field_sza = geofields +'SolarZenithAngle'
## read in file:
with h5py.File(path,'r') as in_f:
## get data arrays
lats = in_f[field_lat].value #[ 1644, 60 ]
lons = in_f[field_lon].value #
hcho = in_f[field_hcho].value #
hcho_rsc= in_f[field_hcho_rsc].value #
qf = in_f[field_qf].value #
xqf = in_f[field_xqf].value #
cld = in_f[field_clouds].value #
sza = in_f[field_sza].value #
## remove missing values and bad flags:
# QF: missing<0, suss=1, bad=2
suss = (qf != 0)
hcho[suss]=np.NaN
lats[suss]=np.NaN
lons[suss]=np.NaN
hcho_rsc[suss]=np.NaN
# XQF:
if removerowanomaly:
xsuss=(xqf != 0)
hcho[xsuss]=np.NaN
lats[xsuss]=np.NaN
lons[xsuss]=np.NaN
hcho_rsc[xsuss]=np.NaN
# remove cloudiness
rmcloud=cld > cloudy
hcho[rmcloud]=np.NaN
lats[rmcloud]=np.NaN
lons[rmcloud]=np.NaN
hcho_rsc[rmcloud]=np.NaN
# remove range outside of screen values
if screen is not None:
rm = (hcho < screen[0]) + (hcho > screen[1])
rmrsc= (hcho_rsc < screen[0]) + (hcho_rsc > screen[1])
hcho[rm]=np.NaN
lats[rm]=np.NaN
lons[rm]=np.NaN
hcho_rsc[rmrsc]=np.NaN
if szamax is not None:
rm = (sza > szamax)
hcho[rm]=np.NaN
lats[rm]=np.NaN
lons[rm]=np.NaN
hcho_rsc[rm]=np.NaN
if mask_ocean:
mask=get_ocean_mask(lats,lons)
hcho[mask]=np.NaN
lats[mask]=np.NaN
lons[mask]=np.NaN
hcho_rsc[mask]=np.NaN
if mask_land:
mask=np.logical_not(get_ocean_mask(lats,lons))
hcho[mask]=np.NaN
lats[mask]=np.NaN
lons[mask]=np.NaN
hcho_rsc[mask]=np.NaN
#return hcho, lats, lons, amf, amfg, w, apri, plevs
return {'HCHO':hcho,'lats':lats,'lons':lons,'HCHO_rsc':hcho_rsc,'qualityflag':qf,'xqf':xqf,'sza':sza}
def read_day_avg(day, subsets, mask_ocean=False, mask_land=False):
'''
Read the average HCHO from a days worth of swathes
molecules/cm2
Returns both the ColumnAmountHCHO and the refsectorcorrected amount
Pass in a list of subsets to pull the daily average out for each subset
'''
YYYYmMMDD=day.strftime("%Ym%m%d")
nsubs=len(subsets)
hchos=np.zeros(nsubs)
hcount=np.zeros(nsubs)
hcho_rscs=np.zeros(nsubs)
hrsccount=np.zeros(nsubs)
# files look like this:
# OMI-Aura_L2-OMHCHO_2009m1230t0156-o29035_v003-2014m0626t164117.SUB.he5
pattern="*OMHCHO_%st*"%YYYYmMMDD
swaths=glob(_swathesfolder+pattern)
# if no swaths!?
if len(swaths) == 0:
print("Warning: %s missing"%YYYYmMMDD)
nans=np.repeat(np.NaN,nsubs)
return(nans,nans,np.repeat(0,nsubs))
#print("reading %d swaths like %s"%(len(swaths),pattern))
# for each swath, grab the entries within our lat/lon bounds
for fpath in swaths:
swath=read_omi_swath(fpath,mask_ocean=mask_ocean, mask_land=mask_land)
# rows x sensors
# I x 60
hcho=swath['HCHO']
hcho_rsc=swath['HCHO_rsc']
lats, lons = swath['lats'], swath['lons']
# subset to region of interest, ignoring warnings
for si, subset in enumerate(subsets):
lllat,lllon,urlat,urlon = subset
with np.errstate(invalid='ignore'):
lonsub = (lons >= lllon) * (lons <= urlon)
latsub = (lats >= lllat) * (lats <= urlat)
sub=lonsub*latsub
hchos[si]=hchos[si]+np.nansum(hcho[sub])
hcount[si]=hcount[si]+np.sum(sub)
hcho_rscs[si]=hcho_rscs[si]+np.nansum(hcho_rsc[sub])
hrsccount[si]=hrsccount[si]+np.sum(sub)
out_avg=np.zeros(nsubs)
out_avg_corr=np.zeros(nsubs)
out_counts=hrsccount
for si in range(nsubs):
if hcount[si]==0:
assert hrsccount[si]==0, "Both counters should be zero if one is..."
out_avg[si]=np.NaN
out_avg_corr[si]=np.NaN
else:
out_avg[si]=hchos[si]/float(hcount[si])
out_avg_corr[si]=hcho_rscs[si]/float(hrsccount[si])
return (out_avg, out_avg_corr, out_counts)
def create_TS(subsets,outnames,mask_ocean=False,mask_land=False):
'''
Create the time series averaging pixels over each 'subset' and writing files to 'names'
'''
# Dates where we have data:
enddate=datetime(2016,1,1)
startdate=datetime(2005,1,1)
ndays=(enddate-startdate).days
dates=[startdate+timedelta(days=d) for d in range(ndays)]
# list of lists
n_subs=len(subsets)
hcho=[[] for i in range(n_subs)]
hcho_rsc=[[] for i in range(n_subs)]
times=[[] for i in range(n_subs)]
counts=[[] for i in range(n_subs)]
# for every day, read the averages and count the entries
st=datetime.now()
for day in dates:
try:
h, hc, c = read_day_avg(day, subsets, mask_ocean=mask_ocean, mask_land=mask_land)
except Exception as e:
print("WARNING: day %s file is bad?"%day.strftime("%Y%m%d"))
print("WARNING: Skipping this day, printing error message:")
print(e)
h, hc, c = (np.repeat(np.NaN,n_subs), np.repeat(np.NaN,n_subs), np.repeat(0,n_subs))
if day == startdate+timedelta(days=100):
check=(datetime.now()-st).total_seconds()
print("~ %3.2f seconds per 100 days"%check)
print("~ %4.2f minutes left..."%(check/100./60*ndays))
ymd=day.strftime("%Y%m%d")
# store them in lists for each subset
for i in range(n_subs):
hcho[i].append(h[i])
hcho_rsc[i].append(hc[i])
times[i].append(ymd)
counts[i].append(c[i])
for i, outname in enumerate(outnames):
print('writing %s'%outname)
print(times[i][0:n_subs],hcho[i][0:n_subs])
with open(outname, 'w') as outf:
writer=csv.writer(outf,quoting=csv.QUOTE_NONE)
writer.writerows(zip(times[i],hcho[i], hcho_rsc[i],counts[i]))
if __name__=='__main__':
#GEOS-Chem grid box: -36, 147.5, -32, 152.5
#Larger NSW region: -38, 145, -30, 153
#Sydney region: -35.5, 150, -33.5, 151.5
# Massive comparison region: -50,110,-10,160
#subsets=[ [-36, 147.5, -32, 152.5],[-38,145,-30,153],[-35.5, 150, -33.5, 151.5] , [-50,110,-10, 160]]
#outnames=['TS_GC.csv','TS_LargerNSW.csv', 'TS_Sydney.csv','TS_Aus.csv']
# subset,outname=subsets[0],outnames[0]
# second set of subsets for Kaitlyn Jan2017
#1) Wollongong region [34.15 , 151 , 35.5 , 150]
wg=[-35.5, 150, -34.15, 151]
#2) South Coast region (you did this one for me last time) [33.5 , 151.5 , 35.5 , 150]
sc=[-35.5, 150, -33.5, 151.5]
#3) Wollongong region with a) just land then b) just water
#4) South Coast region with a) just land then b) just water
subsets=[ wg, sc ]
names1=['TS_Wollongong.csv','TS_SouthCoast.csv']
create_TS(subsets,names1,mask_ocean=False,mask_land=False)
names2=['TS_Wollongong_NoOcean.csv','TS_SouthCoast_NoOcean.csv']
create_TS(subsets,names2,mask_ocean=True,mask_land=False)
names3=['TS_Wollongong_NoLand.csv','TS_SouthCoast_NoLand.csv']
create_TS(subsets,names3,mask_ocean=False,mask_land=True)