def add_chemistry(self): if not hasattr(self, 'flight_data'): raise AttributeError("NO YOU DUMMY, ADD FLIGHT DATA FIRST") chem_dict = {"O3": {"long_name": "Fast Ozone mixing ratio", "units": 'ppbv'}, "CO": {"long_name": "VUX Carbon Monoxide mixing ratio", "units": 'ppbv'}} for chem, attrs in chem_dict.items(): file_glob = os.path.join(CSET_Flight_Piece.chem_data_location, "cset-{}_GV_*{}.nc".format(chem, self.flight_name.upper())) try: chemfile = glob.glob(file_glob)[0] except IndexError as e: print(file_glob) self.flight_data[chem] = (('time'), np.full_like(self.flight_data.GGLAT.values, np.nan)) self.flight_data[chem] = self.flight_data[chem].assign_attrs(attrs) continue dtype = chemfile[-22:-20] date = dt.datetime.strptime(chemfile[-16:-8], "%Y%m%d") assert dtype == chem try: assert date.date() == utils.as_datetime(self.flight_data.time.values[0]).date() except AssertionError as e: print(date.date()) print(utils.as_datetime(self.flight_data.time.values[0]).date()) raise e with xr.open_dataset(chemfile) as data: chem_time = np.array([date + dt.timedelta(seconds=i) for i in data['Start_UTC'].values]) chem_data = data[dtype] data_interp = utils.date_interp(self.flight_data.time.values, chem_time, chem_data, bounds_error=False) self.flight_data[chem] = (('time'), data_interp) self.flight_data[chem] = self.flight_data[chem].assign_attrs(attrs)
def add_MODISPBL_to_trajectory(ds, box_degrees=3): lats, lons, times = ds.lat.values, ds.lon.values, ds.time.values MODIS_day_idx = np.argwhere([i.hour == 23 for i in utils.as_datetime(times)]).squeeze() MODIS_night_idx = np.argwhere([i.hour == 11 for i in utils.as_datetime(times)]).squeeze() # dayfile = '/home/disk/eos4/jkcm/Data/CSET/Ryan/Daily_1x1_JHISTO_CTH_c6_day_v2_calboxes_top10_Interp_hif_zb_2011-2016.nc' dayfile = '/home/disk/eos4/jkcm/Data/CSET/Ryan/Daily_1x1_JHISTO_CTH_c6_day_v2_calboxes_top10_Interp_hif_zb_2011-2016_corrected.nc' nightfile = '/home/disk/eos4/jkcm/Data/CSET/Ryan/Daily_1x1_JHISTO_CTH_c6_night_v2_calboxes_top10_Interp_hif_zb_2011-2016.nc' vals = [] stds = [] nanfrac = [] for i in range(len(times)): if i in MODIS_day_idx: f = dayfile elif i in MODIS_night_idx: f = nightfile else: vals.append(np.nan) stds.append(np.nan) nanfrac.append(np.nan) continue with xr.open_dataset(f) as data: lat, lon, time = lats[i], lons[i], utils.as_datetime(times[i]) t_idx = np.argwhere(np.logical_and(data['days'].values == time.timetuple().tm_yday, data['years'].values == time.year))[0][0] x = data['cth'].sel(longitude=slice(lon - box_degrees/2, lon + box_degrees/2), latitude=slice(lat + box_degrees/2, lat - box_degrees/2)) z = x.isel(time=t_idx).values vals.append(np.nanmean(z)) stds.append(np.nanstd(z)) nanfrac.append(np.sum(np.isnan(z))/z.size) ds['MODIS_CTH'] = (('time'), np.array(vals)) ds['MODIS_CTH_std'] = (('time'), np.array(stds)) ds['MODIS_CTH_nanfrac'] = (('time'), np.array(nanfrac)) return ds
def add_MERRA_to_trajectory(ds, box_degrees=2): lats, lons, times = ds.lat.values, ds.lon.values, utils.as_datetime(ds.time.values) unique_days = set([utils.as_datetime(i).date() for i in times]) files = [os.path.join(utils.MERRA_source, "svc_MERRA2_400.inst3_3d_aer_Nv.{:%Y%m%d}.nc4".format(i)) for i in unique_days] with xr.open_mfdataset(sorted(files)) as data: # data = xr.open_mfdataset(sorted(files)) # if True: ds.coords['lev'] = data.coords['lev'] for var in data.data_vars.keys(): # var = 'RH' # if True: vals = [] for (lat, lon, time) in zip(lats, lons, times): # lat, lon, time = lats[1], lons[1], times[1] # if True: time = time.replace(tzinfo=None) x = data[var].sel(lon=slice(lon - box_degrees/2, lon + box_degrees/2), lat=slice(lat - box_degrees/2, lat + box_degrees/2)) y = x.sel(method='nearest', tolerance=dt.timedelta(minutes=119), time=time) z = y.sel(method='nearest', tolerance=50, level=pres) #this applies a 2D gaussian the width of z, i.e. sigma=box_degrees gauss = utils.gauss2D(shape=z.shape[1:], sigma=z.shape[1]) filtered = z.values * gauss vals.append(np.sum(filtered, axis=(1,2))) ds['MERRA2_'+var] = (('time', 'level'), np.array(vals)) ds['MERRA2_'+var] = ds['MERRA2_'+var].assign_attrs(data[var].attrs) return ds
def __init__(self, flight_name): match = re.match(CSET_Flight.name_re, flight_name.upper()) if not match: raise IOError('cannot recognise flight name') self.flight_number = int(match.group(1)) self.name = flight_name.upper() self.flight_name = flight_name.upper() self.files = dict() self.add_insitu_data(start_time=None, end_time=None) self.add_GOES_data() self.start_time = utils.as_datetime(self.flight_data.time.values[0]) self.end_time = utils.as_datetime(self.flight_data.time.values[-1])
def add_ERA_sfc_data(ds, box_degrees=2): lats, lons, times = ds.lat.values, ds.lon.values, ds.time.values space_index = int(np.round(box_degrees/0.3/2)) # go up/down/left/right this many pixels unique_days = set([utils.as_datetime(i).date() for i in times]) sfc_files = [os.path.join(utils.ERA_source, "ERA5.sfc.NEP.{:%Y-%m-%d}.nc".format(i)) for i in unique_days] with xr.open_mfdataset(sorted(sfc_files)) as data: for var in data.data_vars.keys(): vals = [] for (lat, lon, time) in zip(lats, lons%360, times): if lat > np.max(data.coords['latitude']) or lat < np.min(data.coords['latitude']) or \ lon > np.max(data.coords['longitude']) or lon < np.min(data.coords['longitude']): print('out of range of data') print(lat, lon, time) vals.append(float('nan')) continue x = data[var].sel(longitude=slice(lon - box_degrees/2, lon + box_degrees/2), latitude=slice(lat + box_degrees/2, lat - box_degrees/2)) z = x.sel(method='nearest', tolerance=np.timedelta64(minutes=59), time=time) gauss = utils.gauss2D(shape=z.shape, sigma=z.shape[0]) filtered = z.values * gauss vals.append(np.sum(filtered)) ds['ERA_'+var] = (('time'), np.array(vals)) ds['ERA_'+var] = ds['ERA_'+var].assign_attrs(data[var].attrs) # lhf = ds['ERA_ie'].values*2264705 # ds['ERA_ilhf'] = (('time'), lhf) # ds['ERA_ilhf'] = ds['ERA_ilhf'].assign_attrs({"long_name": "Instantaneous surface latent heat flux", # "units": "W m**-2", # "_FillValue": "NaN"}) # ds['ERA_'+var] = ds['ERA_'+var] return ds
def plot(self, save=False, ax=None): if ax is None: fig, ax = plt.subplots(figsize=(8.5, 5.5)) llr = {'lat': (20, 45), 'lon': (-160, -120)} m = utils.bmap(ax=ax, llr=llr) ax.text(0, 0.9, self.name, horizontalalignment='left', transform=ax.transAxes, fontdict={'size': 20}) m.plot(self.ALC.outbound_flight_data['GGLON'], self.ALC.outbound_flight_data['GGLAT'], latlon=True, lw=10, label='outbound') m.plot(self.ALC.return_flight_data['GGLON'], self.ALC.return_flight_data['GGLAT'], latlon=True, lw=10, label='return') waypoints = utils.get_waypoint_data(self.TLC.outbound_flight, waypoint_type='b') for f in self.TLC.trajectory_files: with xr.open_dataset(f) as data: end = waypoints.loc[float(f[-6:-3])].ret_time.to_pydatetime() idx = utils.as_datetime( data.time.values) < utils.as_datetime(end) m.plot(data.lon[idx], data.lat[idx], latlon=True, lw=5, ls='--', label='traj ' + f[-6:-3]) ax.legend(loc='lower right', ncol=2) if save: fig.savefig(os.path.join(utils.plot_dir, 'case_plots', 'map_{}.png'.format(self.name)), dpi=300, bbox_inches='tight')
def create(rec): id = rec['id'] title = rec['title'] a = Article(id, title) a.content = rec.get('content', '') a.url = rec.get('url', '') a.link = json.loads(rec.get('link')) if rec.has_key('link') else [] a.updated_at = utils.as_datetime(rec.get('updated_at', 0)) a.created_at = utils.as_datetime(rec.get('created_at', 0)) a.is_updated = a.updated_at != a.created_at and datetime.today().date( ) == a.updated_at.date() a.keyword = rec.get('keyword', '').split(',') def parse_link(s): secs = s.split('\t') return {'text': secs[0], 'url': secs[1]} if not a.content: a._fix_content() a.link_str = map( parse_link, filter(lambda n: n, rec.get('linkStr', '').split('\n'))) return a
def add_ERA_data(self): """Retrieve ERA5 data in a box around a trajectory Assumes ERA5 data is 0.3x0.3 degrees Returns an xarray Dataset """ start = utils.as_datetime(self.flight_data.time.values[0]).replace(minute=0, second=0) end = utils.as_datetime(self.flight_data.time.values[-1]).replace(minute=0, second=0)+dt.timedelta(hours=1) dates = np.array([start + dt.timedelta(minutes=i*15) for i in range(1+int((end-start).total_seconds()/(60*15)))]) index = [np.argmin(abs(utils.as_datetime(self.flight_data.time.values) - i)) for i in dates] lats = self.flight_data.GGLAT.values[index] lons = self.flight_data.GGLON.values[index] times = [np.datetime64(i.replace(tzinfo=None)) for i in dates] box_degrees = 2 space_index = int(np.round(box_degrees/0.3/2)) # go up/down/left/right this many pixels unique_days = set([utils.as_datetime(i).date() for i in times]) files = [os.path.join(utils.ERA_source, "ERA5.pres.NEP.{:%Y-%m-%d}.nc".format(i)) for i in unique_days] sfc_files = [os.path.join(utils.ERA_source, "ERA5.sfc.NEP.{:%Y-%m-%d}.nc".format(i)) for i in unique_days] flux_files = [os.path.join(utils.ERA_source, "4dvar_sfc_proc", "ERA5.4Dvarflux.NEP.{:%Y-%m-%d}.nc".format(i)) for i in unique_days] self.files['ERA_files'] = files + sfc_files with xr.open_mfdataset(sorted(files)) as data: #return_ds = xr.Dataset(coords={'time': ds.coords['time'], 'level': data.coords['level']}) ds = xr.Dataset(coords={'time': (('time'), times, data.coords['time'].attrs), 'level': (('level'), data.coords['level'])}) # ds.coords['level'] = data.coords['level'] #adding in q: T = data['t'].values RH = data['r'].values p = np.broadcast_to(data.coords['level'].values[None, :, None, None], T.shape)*100 q = utils.qv_from_p_T_RH(p, T, RH) data['q'] = (('time', 'level', 'latitude', 'longitude'), q) data['q'] = data['q'].assign_attrs({'units': "kg kg**-1", 'long_name': "specific_humidity", 'dependencies': 'ERA_t, ERA_p, ERA_r'}) # adding gradients in for z, t, and q. Assuming constant grid spacing. for var in ['t', 'q', 'z', 'u', 'v']: [_,_,dvardj, dvardi] = np.gradient(data[var].values) dlatdy = 360/4.000786e7 # degrees lat per meter y def get_dlondx(lat) : return(360/(np.cos(np.deg2rad(lat))*4.0075017e7)) lat_spaces = np.diff(data.coords['latitude'].values) lon_spaces = np.diff(data.coords['longitude'].values) assert(np.allclose(lat_spaces, -0.3, atol=0.01) and np.allclose(lon_spaces, 0.3, atol=0.05)) dlondi = np.mean(lon_spaces) dlatdj = np.mean(lat_spaces) dlondx = get_dlondx(data.coords['latitude'].values) dvardx = dvardi/dlondi*dlondx[None,None,:,None] dvardy = dvardj/dlatdj*dlatdy data['d{}dx'.format(var)] = (('time', 'level', 'latitude', 'longitude'), dvardx) data['d{}dy'.format(var)] = (('time', 'level', 'latitude', 'longitude'), dvardy) grad_attrs = {'q': {'units': "kg kg**-1 m**-1", 'long_name': "{}_gradient_of_specific_humidity", 'dependencies': "ERA_t, ERA_p, ERA_r"}, 't': {'units': "K m**-1", 'long_name': "{}_gradient_of_temperature", 'dependencies': "ERA_t"}, 'z': {'units': "m**2 s**-2 m**-1", 'long_name': "{}_gradient_of_geopotential", 'dependencies': "ERA_z"}, 'u': {'units': "m s**-1 m**-1", 'long_name': "{}_gradient_of_zonal_wind", 'dependencies': "ERA_u"}, 'v': {'units': "m s**-1 m**-1", 'long_name': "{}_gradient_of_meridional_wind", 'dependencies': "ERA_v"}} for key, val in grad_attrs.items(): for (n, drn) in [('x', 'eastward'), ('y', 'northward')]: attrs = val.copy() var = 'd{}d{}'.format(key, n) attrs['long_name'] = attrs['long_name'].format(drn) data[var] = data[var].assign_attrs(attrs) for var in data.data_vars.keys(): vals = [] for (lat, lon, time) in zip(lats, lons%360, times): if lat > np.max(data.coords['latitude']) or lat < np.min(data.coords['latitude']) or \ lon > np.max(data.coords['longitude']) or lon < np.min(data.coords['longitude']): print('out of range of data') print(lat, lon, time) vals.append(np.full_like(data.coords['level'], float('nan'), dtype='float')) continue x = data[var].sel(longitude=slice(lon - box_degrees/2, lon + box_degrees/2), latitude=slice(lat + box_degrees/2, lat - box_degrees/2)) z = x.sel(method='nearest', time=time, tolerance=np.timedelta64(1, 'h')) #z = y.sel(method='nearest', tolerance=50, level=pres) #this applies a 2D gaussian the width of z, i.e. sigma=box_degrees # print(z.shape) gauss = utils.gauss2D(shape=z.shape[1:], sigma=z.shape[0]) filtered = z.values * gauss vals.append(np.sum(filtered, axis=(1,2))) ds['ERA_'+var] = (('time', 'level'), np.array(vals)) ds['ERA_'+var] = ds['ERA_'+var].assign_attrs(data[var].attrs) t_1000 = ds.ERA_t.sel(level=1000).values theta_700 = mu.theta_from_p_T(p=700, T=ds.ERA_t.sel(level=700).values) LTS = theta_700-t_1000 ds['ERA_LTS'] = (('time'), np.array(LTS)) ds['ERA_LTS'] = ds['ERA_LTS'].assign_attrs( {"long_name": "Lower tropospheric stability", "units": "K", "_FillValue": "NaN"}) t_dew = t_1000-(100-ds.ERA_r.sel(level=1000).values)/5 lcl = mu.get_LCL(t=t_1000, t_dew=t_dew, z=ds.ERA_z.sel(level=1000).values/9.81) z_700 = ds.ERA_z.sel(level=700).values/9.81 gamma_850 = mu.get_moist_adiabatic_lapse_rate(ds.ERA_t.sel(level=850).values, 850) eis = LTS - gamma_850*(z_700-lcl) ds['ERA_EIS'] = (('time'), np.array(eis)) ds['ERA_EIS'] = ds['ERA_EIS'].assign_attrs( {"long_name": "Estimated inversion strength", "units": "K", "_FillValue": "NaN"}) with xr.open_mfdataset(sorted(sfc_files)) as sfc_data: for var in sfc_data.data_vars.keys(): vals = [] for (lat, lon, time) in zip(lats, lons%360, times): if lat > np.max(sfc_data.coords['latitude']) or lat < np.min(sfc_data.coords['latitude']) or \ lon > np.max(sfc_data.coords['longitude']) or lon < np.min(sfc_data.coords['longitude']): print('out of range of data') print(lat, lon, time) vals.append(float('nan')) continue x = sfc_data[var].sel(longitude=slice(lon - box_degrees/2, lon + box_degrees/2), latitude=slice(lat + box_degrees/2, lat - box_degrees/2)) z = x.sel(method='nearest', time=time, tolerance=np.timedelta64(1, 'h')) gauss = utils.gauss2D(shape=z.shape, sigma=z.shape[0]) filtered = z.values * gauss vals.append(np.sum(filtered)) ds['ERA_'+var] = (('time'), np.array(vals)) ds['ERA_'+var] = ds['ERA_'+var].assign_attrs(sfc_data[var].attrs) with xr.open_mfdataset(sorted(flux_files)) as flux_data: for var in flux_data.data_vars.keys(): if var not in ['sshf', 'slhf']: continue vals = [] for (lat, lon, time) in zip(lats, lons%360, times): if lat > np.max(flux_data.coords['latitude']) or lat < np.min(flux_data.coords['latitude']) or \ lon > np.max(flux_data.coords['longitude']) or lon < np.min(flux_data.coords['longitude']): print('out of range of data') print(lat, lon, time) vals.append(float('nan')) continue x = flux_data[var].sel(longitude=slice(lon - box_degrees/2, lon + box_degrees/2), latitude=slice(lat + box_degrees/2, lat - box_degrees/2)) z = x.sel(method='nearest', time=time, tolerance=np.timedelta64(1, 'h')) gauss = utils.gauss2D(shape=z.shape, sigma=z.shape[0]) filtered = z.values * gauss vals.append(np.sum(filtered)) ds['ERA_'+var] = (('time'), np.array(vals)) ds['ERA_'+var] = ds['ERA_'+var].assign_attrs(flux_data[var].attrs) self.ERA_data = ds