def read_erai_fc(domain,times): #Open ERA-Interim forecast netcdf files and extract variables needed for a range of times # and given spatial domain #Option to also use one time (include hour) ref = dt.datetime(1900,1,1,0,0,0) if len(times) > 1: date_list = date_seq(times,"hours",6) else: date_list = times #If the last date in the list is the start of the next month, don't include in date stamp # list for file names if (date_list[-1].day==1) & (date_list[-1].hour==0): formatted_dates = [format_dates(x) for x in date_list[0:-1]] else: formatted_dates = [format_dates(x) for x in date_list] unique_dates = np.unique(formatted_dates) time_hours = np.empty(len(date_list)) for t in np.arange(0,len(date_list)): time_hours[t] = (date_list[t] - ref).total_seconds() / (3600) #Get time-invariant pressure and spatial info lon,lat = get_lat_lon() lon_ind = np.where((lon >= domain[2]) & (lon <= domain[3]))[0] lat_ind = np.where((lat >= domain[0]) & (lat <= domain[1]))[0] lon = lon[lon_ind] lat = lat[lat_ind] terrain = reform_terrain(lon,lat) #Initialise arrays for each variable wg10 = np.empty((0,len(lat_ind),len(lon_ind))) cape = np.empty((0,len(lat_ind),len(lon_ind))) for date in unique_dates: #print(date) #Load ERA-Interim reanalysis files wg10_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/3hr/atmos/oper_fc_sfc/v01/wg10/\ wg10_3hrs_ERAI_historical_fc-sfc_"+date+"*.nc")[0]) cape_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/3hr/atmos/oper_fc_sfc/v01/cape/\ cape_3hrs_ERAI_historical_fc-sfc_"+date+"*.nc")[0]) #Get times to load in from file times = wg10_file["time"][:] time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)] #Load data wg10 = np.append(wg10,wg10_file["wg10"][time_ind,lat_ind,lon_ind],axis=0) cape = np.append(cape,cape_file["cape"][time_ind,lat_ind,lon_ind],axis=0) wg10_file.close();cape_file.close() return [wg10,cape,lon,lat,date_list]
def file_dates(files, query): is_in = [] for i in np.arange(len(files)): t = dt.datetime.strptime(files[i].split("/")[11][:-1], "%Y%m%dT%H%M") t_list = date_seq( [t + dt.timedelta(hours=6), t + dt.timedelta(days=10)], "hours", 6) if any(np.in1d(query, t_list)): is_in.append(True) else: is_in.append(False) return is_in
def read_merra2(domain,times,pres=True,delta_t=1): #Read 3-hourly MERRA2 pressure level/surface data if len(times) > 1: date_list = date_seq(times,"hours",delta_t) else: date_list = times files_3d = []; files_2d = [] for d in date_list: files_3d.append(glob.glob("/g/data/rr7/MERRA2/raw/M2I3NPASM.5.12.4/"+d.strftime("%Y")+"/"+d.strftime("%m")+"/MERRA2*"+d.strftime("%Y%m%d")+"*.nc4")[0]) files_2d.append(glob.glob("/g/data/ua8/MERRA2/1hr/M2I1NXASM.5.12.4/"+d.strftime("%Y")+"/"+d.strftime("%m")+"/MERRA2*"+d.strftime("%Y%m%d")+"*.nc4")[0]) files_3d = np.unique(files_3d) files_2d = np.unique(files_2d) f3d = xr.open_mfdataset(files_3d, combine="by_coords").sel({"time":date_list, "lev":slice(1000,100), "lon":slice(domain[2], domain[3]), "lat":slice(domain[0], domain[1])}) f2d = xr.open_mfdataset(files_2d, combine="by_coords").sel({"time":date_list, "lon":slice(domain[2], domain[3]), "lat":slice(domain[0], domain[1])}) ta_file = f3d["T"]; z_file = f3d["H"]; ua_file = f3d["U"]; va_file = f3d["V"]; hur_file = f3d["RH"] uas_file = f2d["U10M"]; vas_file = f2d["V10M"]; hus_file = f2d["QV2M"]; tas_file = f2d["T2M"]; ps_file = f2d["PS"] ta = ta_file.values - 273.15 ua = ua_file.values va = va_file.values hgt = z_file.values hur = hur_file.values * 100 hur[hur<0] = 0 hur[hur>100] = 100 dp = get_dp(ta,hur) uas = uas_file.values vas = vas_file.values tas = tas_file.values - 273.15 ps = ps_file.values / 100 ta2d = np.array(mpcalc.dewpoint_from_specific_humidity(hus_file.values, tas*units.units.degC, \ ps*units.units.hectopascal)) terrain = f3d["PHIS"].isel({"time":0}).values / 9.8 lon = f2d["lon"].values lat = f2d["lat"].values p = f3d["lev"].values return [ta,dp,hur,hgt,terrain,p,ps,ua,va,uas,vas,tas,ta2d,lon,lat,date_list]
def read_erai_points(points,times): #Open ERA-Interim netcdf files and extract variables needed for a range of # times at a given set of spatial points #Format dates and times ref = dt.datetime(1900,1,1,0,0,0) date_list = date_seq(times,"hours",6) formatted_dates = [format_dates(x) for x in date_list] unique_dates = np.unique(formatted_dates) time_hours = np.empty(len(date_list)) for t in np.arange(0,len(date_list)): time_hours[t] = (date_list[t] - ref).total_seconds() / (3600) #Get time-invariant pressure and spatial info no_p, pres, p_ind = get_pressure(100) lon,lat = get_lat_lon() [lon_ind, lat_ind, lon_used, lat_used] = get_lat_lon_inds(points,lon,lat) terrain_new = reform_terrain(lon,lat) #Initialise arrays for each variable ta = np.empty((len(date_list),no_p,len(points))) dp = np.empty((len(date_list),no_p,len(points))) hur = np.empty((len(date_list),no_p,len(points))) hgt = np.empty((len(date_list),no_p,len(points))) p = np.empty((len(date_list),no_p,len(points))) ua = np.empty((len(date_list),no_p,len(points))) va = np.empty((len(date_list),no_p,len(points))) uas = np.empty((len(date_list),len(points))) vas = np.empty((len(date_list),len(points))) ps = np.empty((len(date_list),len(points))) for date in unique_dates: print(date) #Load ERA-Interim reanalysis files ta_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/ta/\ ta_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0]) z_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/z/\ z_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0]) ua_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/ua/\ ua_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0]) va_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/va/\ va_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0]) hur_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/hur/\ hur_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0]) uas_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/uas/\ uas_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0]) vas_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/vas/\ vas_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0]) ps_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/ps/\ ps_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0]) #Get times to load in from file times = ta_file["time"][:] time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)] date_ind = np.where(np.array(formatted_dates) == date)[0] #Load data for each spatial point given to function for point in np.arange(0,len(points)): ta[date_ind,:,point] = ta_file["ta"][time_ind,p_ind,lat_ind[point]\ ,lon_ind[point]] - 273.15 ua[date_ind,:,point] = ua_file["ua"][time_ind,p_ind,lat_ind[point]\ ,lon_ind[point]] va[date_ind,:,point] = va_file["va"][time_ind,p_ind,lat_ind[point]\ ,lon_ind[point]] hgt[date_ind,:,point] = z_file["z"][time_ind,p_ind,lat_ind[point]\ ,lon_ind[point]] / 9.8 hur[date_ind,:,point] = hur_file["hur"][time_ind,p_ind,lat_ind[point]\ ,lon_ind[point]] hur[hur<0] = 0 dp[date_ind,:,point] = get_dp(ta[date_ind,:,point],hur[date_ind,:,point]) uas[date_ind,point] = uas_file["uas"][time_ind,lat_ind[point]\ ,lon_ind[point]] vas[date_ind,point] = vas_file["vas"][time_ind,lat_ind[point]\ ,lon_ind[point]] ps[date_ind,point] = ps_file["ps"][time_ind,lat_ind[point]\ ,lon_ind[point]] /100 p[date_ind,:,point] = pres[p_ind] ta_file.close();z_file.close();ua_file.close();va_file.close();hur_file.close();uas_file.close();vas_file.close();ps_file.close() #Save lat/lon as array lon = np.empty((len(points))) lat = np.empty((len(points))) terrain = np.empty((len(points))) for point in np.arange(0,len(points)): lon[point] = points[point][0] lat[point] = points[point][1] terrain[point] = terrain_new[lat_ind[point],lon_ind[point]] return [ta,dp,hur,hgt,terrain,p,ps,ua,va,uas,vas,lon,lat,lon_used,lat_used,date_list]
def read_erai(domain,times): #Open ERA-Interim netcdf files and extract variables needed for a range of times # and given spatial domain #Option to also use one time (include hour) ref = dt.datetime(1900,1,1,0,0,0) if len(times) > 1: date_list = date_seq(times,"hours",6) else: date_list = times formatted_dates = [format_dates(x) for x in date_list] unique_dates = np.unique(formatted_dates) time_hours = np.empty(len(date_list)) for t in np.arange(0,len(date_list)): time_hours[t] = (date_list[t] - ref).total_seconds() / (3600) if (date_list[0].day==1) & (date_list[0].hour<3): fc_unique_dates = np.insert(unique_dates, 0, format_dates(date_list[0] - dt.timedelta(1))) else: fc_unique_dates = np.copy(unique_dates) #Get time-invariant pressure and spatial info no_p, p, p_ind = get_pressure(100) p = p[p_ind] lon,lat = get_lat_lon() lon_ind = np.where((lon >= domain[2]) & (lon <= domain[3]))[0] lat_ind = np.where((lat >= domain[0]) & (lat <= domain[1]))[0] lon = lon[lon_ind] lat = lat[lat_ind] terrain = reform_terrain(lon,lat) #Initialise arrays for each variable ta = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) dp = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) hur = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) hgt = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) ua = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) va = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) wap = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) uas = np.empty((len(date_list),len(lat_ind),len(lon_ind))) vas = np.empty((len(date_list),len(lat_ind),len(lon_ind))) ps = np.empty((len(date_list),len(lat_ind),len(lon_ind))) cp = np.zeros(ps.shape) * np.nan tp = np.zeros(ps.shape) * np.nan cape = np.zeros(ps.shape) * np.nan wg10 = np.zeros(ps.shape) * np.nan tas = np.empty((len(date_list),len(lat_ind),len(lon_ind))) ta2d = np.empty((len(date_list),len(lat_ind),len(lon_ind))) for date in unique_dates: #Load ERA-Interim reanalysis files ta_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/ta/\ ta_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0]) z_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/z/\ z_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0]) wap_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/wap/\ wap_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0]) ua_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/ua/\ ua_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0]) va_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/va/\ va_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0]) hur_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/hur/\ hur_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0]) uas_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/uas/\ uas_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0]) vas_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/vas/\ vas_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0]) ta2d_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/ta2d/\ ta2d_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0]) tas_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/tas/\ tas_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0]) ps_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/ps/\ ps_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0]) #Get times to load in from file times = ta_file["time"][:] time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)] date_ind = np.where(np.array(formatted_dates) == date)[0] #Load analysis data ta[date_ind,:,:,:] = ta_file["ta"][time_ind,p_ind,lat_ind,lon_ind] - 273.15 wap[date_ind,:,:,:] = wap_file["wap"][time_ind,p_ind,lat_ind,lon_ind] ua[date_ind,:,:,:] = ua_file["ua"][time_ind,p_ind,lat_ind,lon_ind] va[date_ind,:,:,:] = va_file["va"][time_ind,p_ind,lat_ind,lon_ind] hgt[date_ind,:,:,:] = z_file["z"][time_ind,p_ind,lat_ind,lon_ind] / 9.8 hur[date_ind,:,:,:] = hur_file["hur"][time_ind,p_ind,lat_ind,lon_ind] hur[hur<0] = 0 hur[hur>100] = 100 dp[date_ind,:,:,:] = get_dp(ta[date_ind,:,:,:],hur[date_ind,:,:,:]) uas[date_ind,:,:] = uas_file["uas"][time_ind,lat_ind,lon_ind] vas[date_ind,:,:] = vas_file["vas"][time_ind,lat_ind,lon_ind] tas[date_ind,:,:] = tas_file["tas"][time_ind,lat_ind,lon_ind] - 273.15 ta2d[date_ind,:,:] = ta2d_file["ta2d"][time_ind,lat_ind,lon_ind] - 273.15 ps[date_ind,:,:] = ps_file["ps"][time_ind,lat_ind,lon_ind] / 100 ta_file.close();z_file.close();ua_file.close();va_file.close();hur_file.close();uas_file.close();vas_file.close();tas_file.close();ta2d_file.close();ps_file.close();wap_file.close() for date in fc_unique_dates: if int(date) >= 197900: tp_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/3hr/atmos/oper_fc_sfc/v01/tp/"\ +"tp_3hrs_ERAI_historical_fc-sfc_"+date+"*.nc")[0]) cp_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/3hr/atmos/oper_fc_sfc/v01/cp/"\ +"cp_3hrs_ERAI_historical_fc-sfc_"+date+"*.nc")[0]) cape_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/3hr/atmos/oper_fc_sfc/v01/cape/"\ +"cape_3hrs_ERAI_historical_fc-sfc_"+date+"*.nc")[0]) wg10_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/3hr/atmos/oper_fc_sfc/v01/wg10/"\ +"wg10_3hrs_ERAI_historical_fc-sfc_"+date+"*.nc")[0]) #Load forecast data fc_times = nc.num2date(cp_file["time"][:], cp_file["time"].units) #an_times = nc.num2date(ps_file["time"][time_ind], ps_file["time"].units) an_times = date_list fc_cp = cp_file.variables["cp"][:,lat_ind,lon_ind] fc_tp = tp_file.variables["tp"][:,lat_ind,lon_ind] fc_cape = cape_file.variables["cape"][:,lat_ind,lon_ind] fc_wg10 = wg10_file.variables["wg10"][:,lat_ind,lon_ind] cnt = 0 for an_t in an_times: try: fc_ind = np.where(an_t == np.array(fc_times))[0][0] cp[cnt] = ((fc_cp[fc_ind] - fc_cp[fc_ind - 1]) * 1000.) tp[cnt] = ((fc_tp[fc_ind] - fc_tp[fc_ind - 1]) * 1000.) cape[cnt] = (fc_cape[fc_ind]) wg10[cnt] = (fc_wg10[fc_ind]) except: pass cnt = cnt + 1 cp_file.close(); cape_file.close(); wg10_file.close(); tp_file.close() return [ta,dp,hur,hgt,terrain,p,ps,wap,ua,va,uas,vas,tas,ta2d,cp,tp,wg10,cape,lon,lat,date_list]
def read_era5(domain,times,pres=True,delta_t=1): #Open ERA5 netcdf files and extract variables needed for a range of times # and given spatial domain ref = dt.datetime(1900,1,1,0,0,0) if len(times) > 1: date_list = date_seq(times,"hours",delta_t) else: date_list = times formatted_dates = [format_dates(x) for x in date_list] unique_dates = np.unique(formatted_dates) time_hours = np.empty(len(date_list)) for t in np.arange(0,len(date_list)): time_hours[t] = (date_list[t] - ref).total_seconds() / (3600) if (date_list[0].day==1) & (date_list[0].hour<3): fc_unique_dates = np.insert(unique_dates, 0, format_dates(date_list[0] - dt.timedelta(1))) else: fc_unique_dates = np.copy(unique_dates) #Get time-invariant pressure and spatial info no_p, p, p_ind = get_pressure(100) p = p[p_ind] lon,lat = get_lat_lon() lon_ind = np.where((lon >= domain[2]) & (lon <= domain[3]))[0] lat_ind = np.where((lat >= domain[0]) & (lat <= domain[1]))[0] lon = lon[lon_ind] lat = lat[lat_ind] terrain = reform_terrain(lon,lat) sfc_lon,sfc_lat = get_lat_lon_sfc() sfc_lon_ind = np.where((sfc_lon >= domain[2]) & (sfc_lon <= domain[3]))[0] sfc_lat_ind = np.where((sfc_lat >= domain[0]) & (sfc_lat <= domain[1]))[0] sfc_lon = sfc_lon[sfc_lon_ind] sfc_lat = sfc_lat[sfc_lat_ind] #Initialise arrays for each variable if pres: ta = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) dp = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) hur = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) hgt = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) ua = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) va = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) wap = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) uas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) vas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) ps = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) cp = np.zeros(ps.shape) * np.nan cape = np.zeros(ps.shape) * np.nan wg10 = np.zeros(ps.shape) * np.nan tas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) ta2d = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) for date in unique_dates: #Load ERA-Interim reanalysis files if pres: ta_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/pressure/t/"+date[0:4]+\ "/t_era5_aus_"+date+"*.nc")[0]) z_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/pressure/z/"+date[0:4]+\ "/z_era5_aus_"+date+"*.nc")[0]) ua_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/pressure/u/"+date[0:4]+\ "/u_era5_aus_"+date+"*.nc")[0]) va_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/pressure/v/"+date[0:4]+\ "/v_era5_aus_"+date+"*.nc")[0]) hur_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/pressure/r/"+date[0:4]+\ "/r_era5_aus_"+date+"*.nc")[0]) uas_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/u10/"+date[0:4]+\ "/u10_era5_global_"+date+"*.nc")[0]) vas_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/v10/"+date[0:4]+\ "/v10_era5_global_"+date+"*.nc")[0]) ta2d_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/d2m/"+date[0:4]+\ "/d2m_era5_global_"+date+"*.nc")[0]) tas_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/t2m/"+date[0:4]+\ "/t2m_era5_global_"+date+"*.nc")[0]) ps_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/sp/"+date[0:4]+\ "/sp_era5_global_"+date+"*.nc")[0]) cape_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/cape/"+date[0:4]+\ "/cape_era5_global_"+date+"*.nc")[0]) cp_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/cp/"+date[0:4]+\ "/cp_era5_global_"+date+"*.nc")[0]) wg10_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/fg10/"+date[0:4]+\ "/fg10_era5_global_"+date+"*.nc")[0]) #Get times to load in from file if pres: times = ta_file["time"][:] time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)] date_ind = np.where(np.array(formatted_dates) == date)[0] else: times = uas_file["time"][:] time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)] date_ind = np.where(np.array(formatted_dates) == date)[0] #Get times to load in from forecast files (wg10 and cp) fc_times = cp_file["time"][:] fc_time_ind = [np.where(x==fc_times)[0][0] for x in time_hours if (x in fc_times)] #Load analysis data if pres: ta[date_ind,:,:,:] = ta_file["t"][time_ind,p_ind,lat_ind,lon_ind] - 273.15 #wap[date_ind,:,:,:] = wap_file["wap"][time_ind,p_ind,lat_ind,lon_ind] ua[date_ind,:,:,:] = ua_file["u"][time_ind,p_ind,lat_ind,lon_ind] va[date_ind,:,:,:] = va_file["v"][time_ind,p_ind,lat_ind,lon_ind] hgt[date_ind,:,:,:] = z_file["z"][time_ind,p_ind,lat_ind,lon_ind] / 9.8 hur[date_ind,:,:,:] = hur_file["r"][time_ind,p_ind,lat_ind,lon_ind] hur[hur<0] = 0 hur[hur>100] = 100 dp[date_ind,:,:,:] = get_dp(ta[date_ind,:,:,:],hur[date_ind,:,:,:]) uas[date_ind,:,:] = uas_file["u10"][time_ind,sfc_lat_ind,sfc_lon_ind] vas[date_ind,:,:] = vas_file["v10"][time_ind,sfc_lat_ind,sfc_lon_ind] tas[date_ind,:,:] = tas_file["t2m"][time_ind,sfc_lat_ind,sfc_lon_ind] - 273.15 ta2d[date_ind,:,:] = ta2d_file["d2m"][time_ind,sfc_lat_ind,sfc_lon_ind] - 273.15 ps[date_ind,:,:] = ps_file["sp"][time_ind,sfc_lat_ind,sfc_lon_ind] / 100 fc_date_ind = np.in1d(date_list, nc.num2date(cp_file["time"][fc_time_ind], cp_file["time"].units)) cp[fc_date_ind,:,:] = cp_file["cp"][fc_time_ind,sfc_lat_ind,sfc_lon_ind] cape[fc_date_ind,:,:] = cape_file["cape"][fc_time_ind,sfc_lat_ind,sfc_lon_ind] wg10[fc_date_ind,:,:] = wg10_file["fg10"][fc_time_ind,sfc_lat_ind,sfc_lon_ind] if pres: ta_file.close();z_file.close();ua_file.close();va_file.close();hur_file.close() uas_file.close();vas_file.close();tas_file.close();ta2d_file.close();ps_file.close() if pres: p = np.flip(p) ta = np.flip(ta, axis=1) dp = np.flip(dp, axis=1) hur = np.flip(hur, axis=1) hgt = np.flip(hgt, axis=1) ua = np.flip(ua, axis=1) va = np.flip(va, axis=1) return [ta,dp,hur,hgt,terrain,p,ps,ua,va,uas,vas,tas,ta2d,cp,wg10,cape,lon,lat,date_list] else: return [ps,uas,vas,tas,ta2d,cp,wg10,cape,sfc_lon,sfc_lat,date_list]
def read_era5_cds(pres_path, sfc_path, domain,times,delta_t=1): #Read data downloaded from the ERA5 CDS. Give this function the file paths for the pressure level # and surface level files ref = dt.datetime(1900,1,1,0,0,0) if len(times) > 1: date_list = date_seq(times,"hours",delta_t) else: date_list = times formatted_dates = [format_dates(x) for x in date_list] unique_dates = np.unique(formatted_dates) time_hours = np.empty(len(date_list)) for t in np.arange(0,len(date_list)): time_hours[t] = (date_list[t] - ref).total_seconds() / (3600) if (date_list[0].day==1) & (date_list[0].hour<3): fc_unique_dates = np.insert(unique_dates, 0, format_dates(date_list[0] - dt.timedelta(1))) else: fc_unique_dates = np.copy(unique_dates) #Get time-invariant pressure and spatial info p = xr.open_dataset(pres_path).level.values p_ind = p>=100 p = p[p_ind] no_p = len(p) lon = xr.open_dataset(pres_path).longitude.values lat = xr.open_dataset(pres_path).latitude.values lon_ind = np.where((lon >= domain[2]) & (lon <= domain[3]))[0] lat_ind = np.where((lat >= domain[0]) & (lat <= domain[1]))[0] lon = lon[lon_ind] lat = lat[lat_ind] sfc_lon = xr.open_dataset(sfc_path).longitude.values sfc_lat = xr.open_dataset(sfc_path).latitude.values sfc_lon_ind = np.where((sfc_lon >= domain[2]) & (sfc_lon <= domain[3]))[0] sfc_lat_ind = np.where((sfc_lat >= domain[0]) & (sfc_lat <= domain[1]))[0] sfc_lon = sfc_lon[sfc_lon_ind] sfc_lat = sfc_lat[sfc_lat_ind] #Initialise arrays for each variable ta = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) dp = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) hur = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) hgt = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) ua = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) va = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) wap = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) uas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) vas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) ps = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) cp = np.zeros(ps.shape) * np.nan tp = np.zeros(ps.shape) * np.nan cape = np.zeros(ps.shape) * np.nan wg10 = np.zeros(ps.shape) * np.nan tas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) ta2d = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) for date in unique_dates: #Load ERA-Interim reanalysis files pres_file = nc.Dataset(pres_path) sfc_file = nc.Dataset(sfc_path) cp_file = xr.open_dataset(sfc_path).isel({"longitude":sfc_lon_ind, "latitude":sfc_lat_ind})\ .resample(indexer={"time":str(delta_t)+"H"},\ label="right",closed="right").sum("time")["cp"][1:,:,:] tp_file = xr.open_dataset(sfc_path).isel({"longitude":sfc_lon_ind, "latitude":sfc_lat_ind})\ .resample(indexer={"time":str(delta_t)+"H"},\ label="right",closed="right").sum("time")["tp"][1:,:,:] #Get times to load in from file times = pres_file["time"][:] time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)] date_ind = np.where(np.array(formatted_dates) == date)[0] #Get times to load in from forecast files (wg10) fc_times = sfc_file["time"][:] fc_time_ind = [np.where(x==fc_times)[0][0] for x in time_hours if (x in fc_times)] #Get times to load in from precip files (tp) tp_time_ind = np.in1d(tp_file.time, [np.datetime64(date_list[i]) for i in np.arange(len(date_list))]) #Load analysis data ta[date_ind,:,:,:] = pres_file["t"][time_ind,p_ind,lat_ind,lon_ind] - 273.15 ua[date_ind,:,:,:] = pres_file["u"][time_ind,p_ind,lat_ind,lon_ind] va[date_ind,:,:,:] = pres_file["v"][time_ind,p_ind,lat_ind,lon_ind] hgt[date_ind,:,:,:] = pres_file["z"][time_ind,p_ind,lat_ind,lon_ind] / 9.8 hur[date_ind,:,:,:] = pres_file["r"][time_ind,p_ind,lat_ind,lon_ind] hur[hur<0] = 0 hur[hur>100] = 100 dp[date_ind,:,:,:] = get_dp(ta[date_ind,:,:,:],hur[date_ind,:,:,:]) uas[date_ind,:,:] = sfc_file["u10"][time_ind,sfc_lat_ind,sfc_lon_ind] vas[date_ind,:,:] = sfc_file["v10"][time_ind,sfc_lat_ind,sfc_lon_ind] tas[date_ind,:,:] = sfc_file["t2m"][time_ind,sfc_lat_ind,sfc_lon_ind] - 273.15 ta2d[date_ind,:,:] = sfc_file["d2m"][time_ind,sfc_lat_ind,sfc_lon_ind] - 273.15 ps[date_ind,:,:] = sfc_file["sp"][time_ind,sfc_lat_ind,sfc_lon_ind] / 100 fc_date_ind = np.in1d(date_list, nc.num2date(sfc_file["time"][fc_time_ind], sfc_file["time"].units)) tp_date_ind = np.in1d([np.datetime64(date_list[i]) for i in np.arange(len(date_list))],tp_file.time.values) cp[tp_date_ind,:,:] = cp_file.isel({"time":tp_time_ind}).values * 1000 tp[tp_date_ind,:,:] = tp_file.isel({"time":tp_time_ind}).values * 1000 cape[fc_date_ind,:,:] = sfc_file["cape"][fc_time_ind,sfc_lat_ind,sfc_lon_ind] wg10[fc_date_ind,:,:] = sfc_file["fg10"][fc_time_ind,sfc_lat_ind,sfc_lon_ind] terrain = sfc_file["z"][0,sfc_lat_ind,sfc_lon_ind] / 9.8 tp_file.close(); cp_file.close(); sfc_file.close(); pres_file.close() p = np.flip(p) ta = np.flip(ta, axis=1) dp = np.flip(dp, axis=1) hur = np.flip(hur, axis=1) hgt = np.flip(hgt, axis=1) ua = np.flip(ua, axis=1) va = np.flip(va, axis=1) return [ta,dp,hur,hgt,terrain,p,ps,ua,va,uas,vas,tas,ta2d,cp,tp,wg10,cape,lon,lat,date_list]
def read_era5_rt52(domain,times,pres=True,delta_t=1): #Open ERA5 netcdf files and extract variables needed for a range of times # and given spatial domain ref = dt.datetime(1900,1,1,0,0,0) if len(times) > 1: date_list = date_seq(times,"hours",delta_t) else: date_list = times formatted_dates = [format_dates(x) for x in date_list] unique_dates = np.unique(formatted_dates) time_hours = np.empty(len(date_list)) for t in np.arange(0,len(date_list)): time_hours[t] = (date_list[t] - ref).total_seconds() / (3600) if (date_list[0].day==1) & (date_list[0].hour<3): fc_unique_dates = np.insert(unique_dates, 0, format_dates(date_list[0] - dt.timedelta(1))) else: fc_unique_dates = np.copy(unique_dates) #Get time-invariant pressure and spatial info no_p, p, p_ind = get_pressure(100) p = p[p_ind] lon,lat = get_lat_lon_rt52() lon_ind = np.where((lon >= domain[2]) & (lon <= domain[3]))[0] lat_ind = np.where((lat >= domain[0]) & (lat <= domain[1]))[0] lon = lon[lon_ind] lat = lat[lat_ind] terrain = reform_terrain(lon,lat) sfc_lon,sfc_lat = get_lat_lon_sfc() sfc_lon_ind = np.where((sfc_lon >= domain[2]) & (sfc_lon <= domain[3]))[0] sfc_lat_ind = np.where((sfc_lat >= domain[0]) & (sfc_lat <= domain[1]))[0] sfc_lon = sfc_lon[sfc_lon_ind] sfc_lat = sfc_lat[sfc_lat_ind] #Initialise arrays for each variable if pres: ta = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) dp = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) hur = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) hgt = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) ua = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) va = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) wap = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind))) uas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) vas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) sst = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) ps = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) cp = np.zeros(ps.shape) * np.nan tp = np.zeros(ps.shape) * np.nan cape = np.zeros(ps.shape) * np.nan wg10 = np.zeros(ps.shape) * np.nan tas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) ta2d = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind))) for date in unique_dates: #Load ERA-Interim reanalysis files if pres: ta_file = nc.Dataset(glob.glob("/g/data/rt52/era5/pressure-levels/reanalysis/t/"+date[0:4]+\ "/t_era5_oper_pl_"+date+"*.nc")[0]) z_file = nc.Dataset(glob.glob("/g/data/rt52/era5/pressure-levels/reanalysis/z/"+date[0:4]+\ "/z_era5_oper_pl_"+date+"*.nc")[0]) ua_file = nc.Dataset(glob.glob("/g/data/rt52/era5/pressure-levels/reanalysis/u/"+date[0:4]+\ "/u_era5_oper_pl_"+date+"*.nc")[0]) va_file = nc.Dataset(glob.glob("/g/data/rt52/era5/pressure-levels/reanalysis/v/"+date[0:4]+\ "/v_era5_oper_pl_"+date+"*.nc")[0]) hur_file = nc.Dataset(glob.glob("/g/data/rt52/era5/pressure-levels/reanalysis/r/"+date[0:4]+\ "/r_era5_oper_pl_"+date+"*.nc")[0]) uas_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/10u/"+date[0:4]+\ "/10u_era5_oper_sfc_"+date+"*.nc")[0]) vas_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/10v/"+date[0:4]+\ "/10v_era5_oper_sfc_"+date+"*.nc")[0]) sst_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/sst/"+date[0:4]+\ "/sst_era5_oper_sfc_"+date+"*.nc")[0]) ta2d_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/2d/"+date[0:4]+\ "/2d_era5_oper_sfc_"+date+"*.nc")[0]) tas_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/2t/"+date[0:4]+\ "/2t_era5_oper_sfc_"+date+"*.nc")[0]) ps_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/sp/"+date[0:4]+\ "/sp_era5_oper_sfc_"+date+"*.nc")[0]) cape_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/cape/"+date[0:4]+\ "/cape_era5_oper_sfc_"+date+"*.nc")[0]) cp_file = (xr.open_dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/mcpr/"+date[0:4]+\ "/mcpr_era5_oper_sfc_"+date+"*.nc")[0]).isel({"longitude":sfc_lon_ind, "latitude":sfc_lat_ind}) * 3600)\ .resample(indexer={"time":str(delta_t)+"H"},\ label="right",closed="right").sum("time")["mcpr"][1:,:,:] tp_file = (xr.open_dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/mtpr/"+date[0:4]+\ "/mtpr_era5_oper_sfc_"+date+"*.nc")[0]).isel({"longitude":sfc_lon_ind, "latitude":sfc_lat_ind}) * 3600)\ .resample(indexer={"time":str(delta_t)+"H"},\ label="right",closed="right").sum("time")["mtpr"][1:,:,:] wg10_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/10fg/"+date[0:4]+\ "/10fg_era5_oper_sfc_"+date+"*.nc")[0]) #Get times to load in from file if pres: times = ta_file["time"][:] time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)] date_ind = np.where(np.array(formatted_dates) == date)[0] else: times = uas_file["time"][:] time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)] date_ind = np.where(np.array(formatted_dates) == date)[0] #Get times to load in from forecast files (wg10) fc_times = wg10_file["time"][:] fc_time_ind = [np.where(x==fc_times)[0][0] for x in time_hours if (x in fc_times)] #Get times to load in from precip files (tp) tp_time_ind = np.in1d(tp_file.time, [np.datetime64(date_list[i]) for i in np.arange(len(date_list))]) #Load analysis data if pres: ta[date_ind,:,:,:] = ta_file["t"][time_ind,p_ind,lat_ind,lon_ind] - 273.15 #wap[date_ind,:,:,:] = wap_file["wap"][time_ind,p_ind,lat_ind,lon_ind] ua[date_ind,:,:,:] = ua_file["u"][time_ind,p_ind,lat_ind,lon_ind] va[date_ind,:,:,:] = va_file["v"][time_ind,p_ind,lat_ind,lon_ind] hgt[date_ind,:,:,:] = z_file["z"][time_ind,p_ind,lat_ind,lon_ind] / 9.8 hur[date_ind,:,:,:] = hur_file["r"][time_ind,p_ind,lat_ind,lon_ind] hur[hur<0] = 0 hur[hur>100] = 100 dp[date_ind,:,:,:] = get_dp(ta[date_ind,:,:,:],hur[date_ind,:,:,:]) uas[date_ind,:,:] = uas_file["u10"][time_ind,sfc_lat_ind,sfc_lon_ind] vas[date_ind,:,:] = vas_file["v10"][time_ind,sfc_lat_ind,sfc_lon_ind] sst[date_ind,:,:] = sst_file["sst"][time_ind,sfc_lat_ind,sfc_lon_ind] - 273.15 tas[date_ind,:,:] = tas_file["t2m"][time_ind,sfc_lat_ind,sfc_lon_ind] - 273.15 ta2d[date_ind,:,:] = ta2d_file["d2m"][time_ind,sfc_lat_ind,sfc_lon_ind] - 273.15 ps[date_ind,:,:] = ps_file["sp"][time_ind,sfc_lat_ind,sfc_lon_ind] / 100 fc_date_ind = np.in1d(date_list, nc.num2date(wg10_file["time"][fc_time_ind], wg10_file["time"].units)) tp_date_ind = np.in1d([np.datetime64(date_list[i]) for i in np.arange(len(date_list))],tp_file.time.values) cp[tp_date_ind,:,:] = cp_file.isel({"time":tp_time_ind}).values tp[tp_date_ind,:,:] = tp_file.isel({"time":tp_time_ind}).values cape[fc_date_ind,:,:] = cape_file["cape"][fc_time_ind,sfc_lat_ind,sfc_lon_ind] wg10[fc_date_ind,:,:] = wg10_file["fg10"][fc_time_ind,sfc_lat_ind,sfc_lon_ind] if pres: ta_file.close();z_file.close();ua_file.close();va_file.close();hur_file.close() uas_file.close();vas_file.close();tas_file.close();ta2d_file.close();ps_file.close() sst_file.close() if pres: p = np.flip(p) ta = np.flip(ta, axis=1) dp = np.flip(dp, axis=1) hur = np.flip(hur, axis=1) hgt = np.flip(hgt, axis=1) ua = np.flip(ua, axis=1) va = np.flip(va, axis=1) return [ta,dp,hur,hgt,terrain,p,ps,ua,va,uas,vas,tas,ta2d,cp,tp,wg10,cape,sst,lon,lat,date_list] else: return [ps,uas,vas,tas,ta2d,cp,tp,wg10,cape,sfc_lon,sfc_lat,date_list]
def to_points_wind_gust(loc_id, points, fname, start_year, end_year): #Load daily maximum wind gust data from du7, and extract point values #As in to_points_loop(), but just for vertical velocity at 700 hPa, from the ma07 directory from dask.diagnostics import ProgressBar import gc ProgressBar().register() dates = [] for y in np.arange(start_year, end_year + 1): for m in np.arange(1, 13): dates.append(dt.datetime(y, m, 1, 12, 0, 0)) last_date = dt.datetime(y + 1, 1, 1, 12, 0, 0) df = pd.DataFrame() lsm = xr.open_dataset( "/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/static/lnd_mask-BARPA-EASTAUS_12km.nc" ) #Read netcdf data for t in np.arange(len(dates)): print(dates[t]) try: query_dates = date_seq( [dates[t], dates[t + 1] + dt.timedelta(days=-1)], "hours", 24) except: query_dates = date_seq( [dates[t], last_date + dt.timedelta(days=-1)], "hours", 24) wg_files = np.sort( glob.glob( "/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/era/erai/r0/*/*/pp0/max_wndgust10m*.nc" )) wg_files = wg_files[file_dates(wg_files, query_dates)] f = drop_duplicates( xr.open_mfdataset(wg_files, concat_dim="time", combine="nested")).sel({"time": query_dates}) #Setup lsm lat = f.coords.get("latitude").values lon = f.coords.get("longitude").values x, y = np.meshgrid(lon, lat) x[lsm.lnd_mask == 0] = np.nan y[lsm.lnd_mask == 0] = np.nan dist_lon = [] dist_lat = [] for i in np.arange(len(loc_id)): dist = np.sqrt(np.square(x-points[i][0]) + \ np.square(y-points[i][1])) temp_lat, temp_lon = np.unravel_index(np.nanargmin(dist), dist.shape) dist_lon.append(temp_lon) dist_lat.append(temp_lat) temp_df = f["max_wndgust10m"].isel(latitude = xr.DataArray(dist_lat, dims="points"), \ longitude = xr.DataArray(dist_lon, dims="points")).persist().to_dataframe() temp_df = temp_df.reset_index() temp_df["time"] = pd.DatetimeIndex( temp_df.time) + dt.timedelta(hours=-12) for p in np.arange(len(loc_id)): temp_df.loc[temp_df.points == p, "loc_id"] = loc_id[p] temp_df = temp_df.drop(["points",\ "forecast_period", "forecast_reference_time", "height"],axis=1) df = pd.concat([df, temp_df]) f.close() gc.collect() df.sort_values([ "loc_id", "time" ]).to_pickle("/g/data/eg3/ab4502/ExtremeWind/points/" + fname + ".pkl")
def read_barpa(domain, time, experiment, forcing_mdl, ensemble): #NOTE: Data has been set to zero for below surface pressure. #But wrf_parallel doesn't use these levels anyway #TODO: The above statement I think is false. -273.15 K values may cause problems for some routines, # even if below ground level. Mask these values to NaN #Create a list of 6-hourly "query" date-times, based on the start and end dates provided. query_dates = date_seq(time, "hours", 6) #Get a list of all BARPA files in the du7 directory, for a given experiment/forcing model geopt_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\ experiment+"/"+forcing_mdl+\ "/"+ensemble+"/*/*/pp2/geop_ht_uv*")) hus_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\ experiment+"/"+forcing_mdl+\ "/"+ensemble+"/*/*/pp2/spec_hum*")) ta_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\ experiment+"/"+forcing_mdl+\ "/"+ensemble+"/*/*/pp2/air_temp*")) ua_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\ experiment+"/"+forcing_mdl+\ "/"+ensemble+"/*/*/pp2/wnd_ucmp*")) va_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\ experiment+"/"+forcing_mdl+\ "/"+ensemble+"/*/*/pp2/wnd_vcmp*")) huss_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\ experiment+"/"+forcing_mdl+\ "/"+ensemble+"/*/*/pp3/qsair_scrn*")) dewpt_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\ experiment+"/"+forcing_mdl+\ "/"+ensemble+"/*/*/pp26/dewpt_scrn*")) tas_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\ experiment+"/"+forcing_mdl+\ "/"+ensemble+"/*/*/pp3/temp_scrn*")) uas_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\ experiment+"/"+forcing_mdl+\ "/"+ensemble+"/*/*/pp3/uwnd10m_b*")) vas_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\ experiment+"/"+forcing_mdl+\ "/"+ensemble+"/*/*/pp3/vwnd10m_b*")) ps_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\ experiment+"/"+forcing_mdl+\ "/"+ensemble+"/*/*/pp26/sfc_pres*")) wg_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\ experiment+"/"+forcing_mdl+\ "/"+ensemble+"/*/*/pp26/wndgust10m*")) #Get the files that we need geopt_files = geopt_files[file_dates(geopt_files, query_dates)] hus_files = hus_files[file_dates(hus_files, query_dates)] ta_files = ta_files[file_dates(ta_files, query_dates)] ua_files = ua_files[file_dates(ua_files, query_dates)] va_files = va_files[file_dates(va_files, query_dates)] huss_files = huss_files[file_dates(huss_files, query_dates)] dewpt_files = dewpt_files[file_dates(dewpt_files, query_dates)] tas_files = tas_files[file_dates(tas_files, query_dates)] uas_files = uas_files[file_dates(uas_files, query_dates)] vas_files = vas_files[file_dates(vas_files, query_dates)] ps_files = ps_files[file_dates(ps_files, query_dates)] wg_files = wg_files[file_dates(wg_files, query_dates)] #Load in these files, dropping duplicates #Drop the variable "realization", as it appears in some streams but not others, and is not used geopt_ds = drop_duplicates( xr.open_mfdataset(geopt_files, concat_dim="time", combine="nested", drop_variables=["realization"])) #m hus_ds = drop_duplicates( xr.open_mfdataset(hus_files, concat_dim="time", combine="nested", drop_variables=["realization"])) #1 (kg/kg?) ta_ds = drop_duplicates( xr.open_mfdataset(ta_files, concat_dim="time", combine="nested", drop_variables=["realization"])) #K ua_ds = drop_duplicates( xr.open_mfdataset(ua_files, concat_dim="time", combine="nested", drop_variables=["realization"])) #m/s va_ds = drop_duplicates( xr.open_mfdataset(va_files, concat_dim="time", combine="nested", drop_variables=["realization"])) #m/s huss_ds = drop_duplicates( xr.open_mfdataset(huss_files, concat_dim="time", combine="nested", drop_variables=["realization"])) #kg/kg dewpt_ds = drop_duplicates( xr.open_mfdataset(dewpt_files, concat_dim="time", combine="nested", drop_variables=["realization"])) #K tas_ds = drop_duplicates( xr.open_mfdataset(tas_files, concat_dim="time", combine="nested", drop_variables=["realization"])) #K uas_ds = drop_duplicates( xr.open_mfdataset(uas_files, concat_dim="time", combine="nested", drop_variables=["realization"])) #m/s vas_ds = drop_duplicates( xr.open_mfdataset(vas_files, concat_dim="time", combine="nested", drop_variables=["realization"])) #m/s ps_ds = drop_duplicates( xr.open_mfdataset(ps_files, concat_dim="time", combine="nested", drop_variables=["realization"])) #Pa wg_ds = drop_duplicates( xr.open_mfdataset(wg_files, concat_dim="time", combine="nested", drop_variables=["realization"])) #m/s #Slice to query times, spatial domain, convert to dataarray, restrict to below 100 hPa lons = slice(domain[2], domain[3]) lats = slice(domain[0], domain[1]) geopt_da = geopt_ds.sel({ "time": query_dates, "pressure": geopt_ds["pressure"] >= 100, "latitude": lats, "longitude": lons })["geop_ht_uv"] hus_da = hus_ds.sel({ "time": query_dates, "pressure": geopt_ds["pressure"] >= 100, "latitude": lats, "longitude": lons })["spec_hum_uv"] ta_da = ta_ds.sel({ "time": query_dates, "pressure": geopt_ds["pressure"] >= 100, "latitude": lats, "longitude": lons })["air_temp_uv"] ua_da = ua_ds.sel({ "time": query_dates, "pressure": geopt_ds["pressure"] >= 100, "latitude": lats, "longitude": lons })["wnd_ucmp_uv"] va_da = va_ds.sel({ "time": query_dates, "pressure": geopt_ds["pressure"] >= 100, "latitude": lats, "longitude": lons })["wnd_vcmp_uv"] huss_da = huss_ds.sel({ "time": query_dates, "latitude": lats, "longitude": lons })["qsair_scrn"] dewpt_da = dewpt_ds.sel({ "time": query_dates, "latitude": lats, "longitude": lons })["dewpt_scrn"] tas_da = tas_ds.sel({ "time": query_dates, "latitude": lats, "longitude": lons })["temp_scrn"] uas_da = uas_ds.sel({ "time": query_dates, "latitude": lats, "longitude": lons })["uwnd10m_b"] vas_da = vas_ds.sel({ "time": query_dates, "latitude": lats, "longitude": lons })["vwnd10m_b"] ps_da = ps_ds.sel({ "time": query_dates, "latitude": lats, "longitude": lons })["sfc_pres"] wg_da = wg_ds.sel({ "time": query_dates, "latitude": lats, "longitude": lons })["wndgust10m"] #As in read_cmip, make sure that all data arrays have the same times (take the union of the set of times). #If one of the dataarrays goes to size=0 on the time dimension, throw an error common_dates = np.array(list(set(hus_da.time.values) & set(ta_da.time.values) & set(ua_da.time.values)\ & set(va_da.time.values) & set(huss_da.time.values) & set(tas_da.time.values)\ & set(uas_da.time.values) & set(vas_da.time.values) & set(ps_da.time.values)\ & set(geopt_da.time.values) & set(wg_da.time.values) & set(dewpt_da.time.values))) geopt_da = geopt_da.isel({"time": np.in1d(geopt_da.time, common_dates)}) hus_da = hus_da.isel({"time": np.in1d(hus_da.time, common_dates)}) ta_da = ta_da.isel({"time": np.in1d(ta_da.time, common_dates)}) ua_da = ua_da.isel({"time": np.in1d(ua_da.time, common_dates)}) va_da = va_da.isel({"time": np.in1d(va_da.time, common_dates)}) huss_da = huss_da.isel({"time": np.in1d(huss_da.time, common_dates)}) dewpt_da = dewpt_da.isel({"time": np.in1d(dewpt_da.time, common_dates)}) tas_da = tas_da.isel({"time": np.in1d(tas_da.time, common_dates)}) uas_da = uas_da.isel({"time": np.in1d(uas_da.time, common_dates)}) vas_da = vas_da.isel({"time": np.in1d(vas_da.time, common_dates)}) ps_da = ps_da.isel({"time": np.in1d(ps_da.time, common_dates)}) wg_da = wg_da.isel({"time": np.in1d(wg_da.time, common_dates)}) for da in [ geopt_da, hus_da, ta_da, ua_da, va_da, huss_da, dewpt_da, tas_da, uas_da, vas_da, ps_da, wg_da ]: if len(da.time.values) == 0: varname = da.attrs["standard_name"] raise ValueError("ERROR: " + varname + " HAS BEEN SLICED IN TIME DIMENSION TO SIZE=0") #Now linearly interpolate pressure level data to match the BARRA pressure levels kwargs = {"fill_value": None, "bounds_error": False} #barra_levs = [100.0000000001, 150.0000000001, 175.0000000001, # 200.0000000001, 225.0000000001, 250.0000000001, 275.0000000001, # 300.0000000001, 350.0000000001, 400.0000000001, 450.0000000001, # 500.0000000001, 600.0000000001, 700.0000000001, 750.0000000001, # 800.0000000001, 850.0000000001, 900.0000000001, 925.0000000001, # 950.0000000001, 975.0000000001, 1000.0000000001] #geopt_da = geopt_da.interp(coords={"pressure":barra_levs}, method="linear", kwargs=kwargs) #hus_da = hus_da.interp(coords={"pressure":barra_levs}, method="linear", kwargs=kwargs) #ta_da = ta_da.interp(coords={"pressure":barra_levs}, method="linear", kwargs=kwargs) #ua_da = ua_da.interp(coords={"pressure":barra_levs}, method="linear", kwargs=kwargs) #va_da = va_da.interp(coords={"pressure":barra_levs}, method="linear", kwargs=kwargs) #Linearly interpolate variables onto the same lat/lon grid (pressure level U/V grid). Extrapolate to staggered values outside the grid huss_da = huss_da.interp(coords={ "latitude": hus_da.latitude, "longitude": hus_da.longitude }, method="linear", kwargs=kwargs) dewpt_da = dewpt_da.interp(coords={ "latitude": hus_da.latitude, "longitude": hus_da.longitude }, method="linear", kwargs=kwargs) tas_da = tas_da.interp(coords={ "latitude": hus_da.latitude, "longitude": hus_da.longitude }, method="linear", kwargs=kwargs) uas_da = uas_da.interp(coords={ "latitude": hus_da.latitude, "longitude": hus_da.longitude }, method="linear", kwargs=kwargs) vas_da = vas_da.interp(coords={ "latitude": hus_da.latitude, "longitude": hus_da.longitude }, method="linear", kwargs=kwargs) ps_da = ps_da.interp(coords={ "latitude": hus_da.latitude, "longitude": hus_da.longitude }, method="linear", kwargs=kwargs) wg_da = wg_da.interp(coords={ "latitude": hus_da.latitude, "longitude": hus_da.longitude }, method="linear", kwargs=kwargs) #Get numpy arrays of everything, and convert temperatures to degC and sfc pressure to hPa geopt = geopt_da.values hus = hus_da.values ta = ta_da.values - 273.15 ua = ua_da.values va = va_da.values huss = huss_da.values dewpt = dewpt_da.values - 273.15 tas = tas_da.values - 273.15 uas = uas_da.values vas = vas_da.values ps = ps_da.values / 100. wg = wg_da.values #Mask -273.15 K values (these should only be values below surface) mask = (ta == (-273.15)) geopt[mask] = np.nan hus[mask] = np.nan ta[mask] = np.nan ua[mask] = np.nan va[mask] = np.nan #Create 3d pressure variable p = np.moveaxis( np.tile(hus_da.pressure.values, [ta.shape[2], ta.shape[3], 1]), 2, 0) #Get hur from hus, ta and p3d hur = np.array(mpcalc.relative_humidity_from_specific_humidity(hus, \ ta*units.degC, p*units.hectopascal) * 100) hur[hur < 0] = 0 hur[hur > 100] = 100 #Load terrain data terrain = xr.open_dataset("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/static/topog-BARPA-EASTAUS_12km.nc").\ sel({"latitude":lats, "longitude":lons})["topog"].values #Get lat/lon lat = hus_da.latitude.values lon = hus_da.longitude.values #Flip the pressure dimension ta = np.flip(ta, axis=1) hur = np.flip(hur, axis=1) geopt = np.flip(geopt, axis=1) p = np.flip(p, axis=0) ua = np.flip(ua, axis=1) va = np.flip(va, axis=1) #Return times from one of the data arrays (they are identical in time). If it is different to the query date, then throw a warning query_times = pd.to_datetime(query_dates) times = pd.to_datetime(huss_da.time.values) if all(np.in1d(query_times, times)): pass else: message = "\n ".join( ~query_times[np.in1d(query_times, times)].strftime("%Y%m%d %H:%M")) warnings.warn("WARNING: The following query dates were not loaded..." + message) #Format times for output (datetime objects) out_times = [ dt.datetime.strptime( huss_da.time.dt.strftime("%Y-%m-%d %H:%M").values[i], "%Y-%m-%d %H:%M") for i in np.arange(huss_da.time.shape[0]) ] return [ta, hur, geopt, terrain, p[:,0,0], ps, ua, va, uas, vas, tas, dewpt, wg, lon,\ lat, out_times]