def osg_data(runDATE, XY_point_pairs, variable='GUST:surface', fxx=range(19), percentile=95): ''' Return a dictionary of HRRR climatology from OSG statistics for a series of points Input: runDATE - Python Datetime Object for the model run date interested in XY_point_pairs - XY array coordinates for the points you want variable - HRRR variable name fxx - Forecasts to include in the output percentile - The percentile (from the file) you want to get outdir - Path to write file ''' ## First, get the latitude and longitude grid points that coorespond to the XY points # Load the HRRR latitude and longitude grid hLATLON = get_hrrr_latlon() # Get the latitude and longitude values for each XY point pair ulon = np.array([hLATLON['lon'][m, n] for m, n in XY_point_pairs]) ulat = np.array([hLATLON['lat'][m, n] for m, n in XY_point_pairs]) # We want to return these return_this = {'lat': ulat, 'lon': ulon, 'percentile': percentile} ## Next, get the forecasts for each of the points for f in fxx: print('\rworking on f%02d' % f, end="") # Get the HRRR File validDATE = runDATE + timedelta(hours=f) DATE = datetime(2016, validDATE.month, validDATE.day, validDATE.hour) var = variable.replace(':', '_').replace(' ', '_') DIR = '/uufs/chpc.utah.edu/common/home/horel-group8/blaylock/HRRR_OSG/hourly30/%s/' % var FILE = 'OSG_HRRR_%s_m%02d_d%02d_h%02d_f00.h5' % (var, DATE.month, DATE.day, DATE.hour) print(DIR + FILE) OSG = xarray.open_dataset(DIR + FILE) # Get the points from the grid for the transmission lines OSG_pth = np.array( [OSG['p%02d' % percentile].data[m, n] for m, n in XY_point_pairs]) # Add that to the array we want to return return_this['f%02d' % f] = OSG_pth print('\rDONE!') return return_this
def load_lats_lons(model): """ Preload the latitude and longitude grid """ if model in ['hrrr', 'hrrrX']: lats, lons = get_hrrr_latlon(DICT=False) elif model == 'hrrrak': AK = get_hrrr_variable(datetime(2018, 2, 24, 15), 'TMP:2 m', fxx=0, model='hrrrak', verbose=False) lats = AK['lat'] lons = AK['lon'] return [lats, lons]
def RMSD(validDATE, variable, FORECASTS=range(19), verbose=True): """ Root-mean-square-difference for a single forecast time. datetime(2018, 8, 12, 21) # period of convection over Coal Hollow Fire variable = 'CAPE:surface' """ # Load all forecasts grids for this time if variable.split(':')[0] == 'UVGRD': values = np.array([get_hrrr_variable(validDATE-timedelta(hours=f), variable, fxx=f, value_only=True, verbose=False)['SPEED'] for f in FORECASTS]) forecasts = np.array([i for i in values if np.shape(i) != ()]) else: # We have to filter out any 'nan' values if a file does not exist values = [get_hrrr_variable(validDATE-timedelta(hours=f), variable, fxx=f, value_only=True, verbose=False)['value'] for f in FORECASTS] forecasts = np.array([i for i in values if np.shape(i) != ()]) if verbose: print(np.shape(forecasts)) # Differences of each consecutive forecast (F00-F01, F01-F02, F03-F04, etc.) #differences = np.array([forecasts[i-1]-forecasts[i] for i in FORECASTS[1:]]) # RMSD between consecutive forecasts and all reference hours (differences matrix) #print(['F%02d-F%02d' % (i, j) for i in FORECASTS for j in FORECASTS if i-j > 0]) # Differences between each forecasts (don't double count) differences_all = np.array([forecasts[i]-forecasts[j] for i in range(len(forecasts)) for j in range(len(forecasts)) if i-j > 0]) RMSD_all = np.sqrt(np.mean(differences_all**2, axis=0)) ## Normalized RMSDs, normalized by range (max-min) # NOTE: Ranges can't be zero or will get a divide by zero error # Get some percentiles for max, min q100, q00 = np.percentile(forecasts, [100, 0], axis=0) maxmin_range = q100-q00 maxmin_range[maxmin_range==0] = np.nan nRMSD_range = RMSD_all/maxmin_range # Grid Lat/Lon and return info latlon = get_hrrr_latlon() latlon['RMSD'] = RMSD_all latlon['variable'] = variable latlon['DATE'] = validDATE latlon['normalized RMSD by range'] = nRMSD_range return latlon
def forecast_data(runDATE, XY_point_pairs, variable='GUST:surface', fxx=range(19)): ''' Return a dictionary of forecasted values for a series of points Input: runDATE - Python Datetime Object for the model run date interested in. XY_point_pairs - XY array coordinate pairs for the points you want. variable - HRRR variable name. fxx - Forecasts to include in the output. ''' ## First, get the latitude and longitude grid points that coorespond to the XY points # Load the HRRR latitude and longitude grid hLATLON = get_hrrr_latlon() # Get the latitude and longitude values for each XY point pair ulon = np.array([hLATLON['lon'][m, n] for m, n in XY_point_pairs]) ulat = np.array([hLATLON['lat'][m, n] for m, n in XY_point_pairs]) # We want to return these header = 'LATITUDE, LONGITUDE' return_this = {'lat': ulat, 'lon': ulon} ## Next, get the forecasts for each of the points for f in fxx: print('\rworking on f%02d' % f, end="") # Get the HRRR File H = get_hrrr_variable(runDATE, variable='GUST:surface', fxx=f, verbose=False) # Get the points from the grid for the transmission lines vv = np.array([H['value'][m, n] for m, n in XY_point_pairs]) # Add that to the array we want to return return_this['f%02d' % f] = vv print('\rDONE!') return return_this
def RMSD_range(sDATE, eDATE, variable, HOURS=[0], FORECASTS=range(19)): """ Compute the RMSD for a range of dates at a given hour. Inputs: sDATE - Datetime start(valid Date) eDATE - Datetime end (valid Date) variable - HRRR variable string (i.e. 'TMP:2 m') HOURS - the hour(s) you want to include in the RMSD calculation FORECASTS - the forecasts you want to include in the RMSD calculation """ print('============ %s ============' % variable) print(' sDATE : %s' % sDATE) print(' eDATE : %s' % eDATE) print(' HOURS : %s' % HOURS) print(' FORCASTS : %s' % FORECASTS) print() days = (eDATE-sDATE).days DAYS = [sDATE + timedelta(days=d) for d in range(days)] DATES = [datetime(d.year, d.month, d.day, h) for d in DAYS for h in HOURS] # Load all forecast grids for each valid time using multiprocessing. #cpus = np.minimum(multiprocessing.cpu_count(), len(DATES))-2 cpus = 7 P = multiprocessing.Pool(cpus) inputs = [[D, variable, FORECASTS] for D in DATES] results = P.map(RMSD_range_MP, inputs) P.close() count = np.sum([i[0] for i in results]) sum_squares = np.sum([i[1] for i in results], axis=0) # RMSD Calculation RMSD = np.sqrt(sum_squares/count) # Grid Lat/Lon and return info latlon = get_hrrr_latlon() latlon['RMSD'] = RMSD latlon['variable'] = variable latlon['DATE RANGE'] = [sDATE, eDATE] return latlon
mpl.rcParams['xtick.labelsize'] = 10 mpl.rcParams['ytick.labelsize'] = 10 mpl.rcParams['axes.labelsize'] = 12 mpl.rcParams['axes.titlesize'] = 15 mpl.rcParams['lines.linewidth'] = 1.8 mpl.rcParams['grid.linewidth'] = .25 mpl.rcParams['figure.subplot.wspace'] = 0.05 mpl.rcParams['figure.subplot.hspace'] = 0.05 mpl.rcParams['legend.fontsize'] = 10 mpl.rcParams['legend.framealpha'] = .75 mpl.rcParams['legend.loc'] = 'best' mpl.rcParams['savefig.bbox'] = 'tight' mpl.rcParams['savefig.dpi'] = 100 # Build map objects and get HRRR latitude and longitude grids. latlon = get_hrrr_latlon() lat = latlon['lat'] lon = latlon['lon'] m = draw_HRRR_map() # CONUS mW = draw_centermap(40, -115, (10,10)) # West mU = draw_centermap(39.5, -111.6, (3.2,3.2)) # Utah # Variable constants VARS = {'TMP:2 m':{'cmap':'magma', 'vmax':2.5, 'vmin':0, 'label':'2 m Temperature', 'units':'C'}, 'DPT:2 m':{'cmap':'magma', 'vmax':5, 'vmin':0,
def filter_by_path(glm): """ Inputs: glm - the object returned by accumulate_GLM """ from matplotlib.path import Path from BB_HRRR.HRRR_Pando import get_hrrr_latlon # Get HRRR latitude and longitude grids Hlat, Hlon = get_hrrr_latlon(DICT=False) # ============================================================================= print('Make domain paths...') ## Create Path boundaries of HRRR domain and subdomains of interest: # HRRR: All points counter-clockwise around the model domain. # West, Central, East: A 16 degree wide and 26 degree tall boundary region. PATH_points = { 'HRRR': { 'lon': np.concatenate( [Hlon[0], Hlon[:, -1], Hlon[-1][::-1], Hlon[:, 0][::-1]]), 'lat': np.concatenate( [Hlat[0], Hlat[:, -1], Hlat[-1][::-1], Hlat[:, 0][::-1]]) }, 'West': { 'lon': [-120, -104, -104, -120, -120], 'lat': [24.4, 24.4, 50.2, 50.2, 24.2] }, 'Central': { 'lon': [-104, -88, -88, -104, -104], 'lat': [24.4, 24.4, 50.2, 50.2, 24.2] }, 'East': { 'lon': [-88, -72, -72, -88, -88], 'lat': [24.4, 24.4, 50.2, 50.2, 24.2] }, 'Utah': { 'lon': [ -114.041664, -111.047526, -111.045645, -109.051460, -109.048632, -114.051534, -114.041664 ], 'lat': [ 41.993580, 42.002846, 40.998538, 40.998403, 36.998310, 37.000574, 41.993580 ] } } ## Combine lat/lon as vertice pair as a tuple. i.e. (lon, lat). PATH_verts = {} for i in PATH_points.keys(): PATH_verts[i] = np.array([(PATH_points[i]['lon'][j], PATH_points[i]['lat'][j]) for j in range(len(PATH_points[i]['lon']))]) ## Generate Path objects from the vertices. PATHS = {} for i in PATH_verts.keys(): PATHS[i] = Path(PATH_verts[i]) ## Filter GLM observation within the HRRR domain. ------------------------- #-------------------------------------------------------------------------- # The GLM observes flashes for the disk in its field of view. We only want # the flashes within the HRRR domain. This is tricky. We can't just use # lat/lon bounds, because the projection of the HRRR model *bends* the # lat/lon and we would overshoot corners. # Instead, we will use the Paths created at the begining of the script and # determine which GLM points are inside each Path.Patch. # Total number of GLM observations print("Total GLM observations:", len(glm['latitude'])) # Generate a lat/lon tuple for each point print('generate lat/lon pair...') GLM_latlon_pair = list(zip(glm['longitude'], glm['latitude'])) print('...done') # Return a None if the legnth of GLM_latlon_pairs is zero, because there # are no flashes. if len(GLM_latlon_pair) == 0: print('!!! Warning !!! DATE %s had no lightning data' % DATE) # Return: hit_rate, number of flashes, number of files, numb of expected files return None ## Determine which GLM observation points, from a lat/lon tuple, are in # each boundary Path. # !! Refer to the Paths generated at the begining of the script!! # Using each Path in PATHS, determine which GLM points fall inside the Path print('Find which points are inside path') print('') inside_path = {} for i in PATHS.keys(): inside_path[i] = PATHS[i].contains_points(GLM_latlon_pair) print('...done!') ## Filter the GLM data keys by the bounding Path (points inside Path): filtered_glm = {'DATETIME': glm['DATETIME']} for i in inside_path.keys(): if np.size(glm['area']) == 1: filtered_glm[i] = { 'latitude': glm['latitude'][inside_path[i]], 'longitude': glm['longitude'][inside_path[i]], 'energy': glm['energy'][inside_path[i]], 'area': None } else: filtered_glm[i] = { 'latitude': glm['latitude'][inside_path[i]], 'longitude': glm['longitude'][inside_path[i]], 'energy': glm['energy'][inside_path[i]], 'area': glm['area'][inside_path[i]] } return filtered_glm
except: print('%s no MesoWest dew point' % a['NAME']) try: loc['fig'][3].plot(a['DATETIME'], mps_to_MPH(a['wind_speed']), c='k', ls='--', zorder=50) except: print('%s no MesoWest wind speed' % a['NAME']) ## Now, add the element that changes, save the figure for each forecast. # 2.2) Get Radar Reflectivity and winds for entire CONUS for every forecast HH_refc = get_hrrr_all_run(DATE, 'REFC:entire') HH_u, HH_v, HH_spd = get_hrrr_all_run(DATE, 'UVGRD:10 m') Hlat, Hlon = get_hrrr_latlon(DICT=False) # Convert Units (meters per second -> miles per hour) HH_u = mps_to_MPH(np.array(HH_u)) HH_v = mps_to_MPH(np.array(HH_v)) HH_spd = mps_to_MPH(np.array(HH_spd)) for fxx in range(0, 19): for n, (name, loc) in enumerate(location.items()): tz = loc['timezone'] print(" --> Working on: F%02d %s" % (fxx, name)) ## Get grids for this particular forecast time... H_refc = HH_refc[fxx] H_u = HH_u[fxx] H_v = HH_v[fxx] #
def NEP(validDATE, threshold=35, variable='REFC:entire', radius=9, fxx=range(2, 5), hours_span=0): ''' Compute the Neighborhood Ensemble Probability Input: DATE - Datetime object representing the valid date. threshold - The threshold value for which you wish to compute probability. variable - Variable string from the HRRR .idx file radius - The number of grid points the radial spatial filter uses fxx - A list of forecast hours (between 0 and 18) to use in the probability. hour_span - Number hours +/- the valid hour to consider as members. Default 0 will only use the validDATE. If you set to 1, then will use additional model runs valid for +/- 1 hour the validDATE ''' # Generate a list of datetimes based on the validDATE and the hours_span sDATE = validDATE - timedelta(hours=hours_span) eDATE = validDATE + timedelta(hours=hours_span) DATES = [ sDATE + timedelta(hours=h) for h in range(int((eDATE - sDATE).seconds / 60 / 60) + 1) ] print() print('### Neighborhood Ensemble Probability###') print(' Valid Dates:\t%s' % DATES) print(' Variable:\t%s' % variable) print(' Threshold value:\t%s' % threshold) print(' Radial filter:\t %s grid points (%s km)' % (radius, (3 * radius))) print(' Forecast hours:\t%s' % (['f%02d' % f for f in fxx])) ## First, compute the ensemble probability (EP) for each grid point. # Retrieve all the HRRR grids as each member. inputs_list = [[d, f, threshold, variable, threshold] for d in DATES for f in fxx] # Multiprocessing :) cpus = np.minimum(multiprocessing.cpu_count(), len(inputs_list)) p = multiprocessing.Pool(cpus) members = np.array(p.map(member_multipro, inputs_list)) p.close() # Average of all grid points, ensemble probability (EP) EP = np.mean(members, axis=0) ## Second, average EP neighborhood at each point. NEP = ndimage.generic_filter(EP, neighborhood_mean, footprint=radial_footprint(radius)) print("\n###########################################################") print(" Neighborhood Ensemble Probability:") print( " Percentage of grid points within the radius %s km (%s grid points)" % (radius * 3, radius)) print(" where %s >= %s" % (variable, threshold)) print("###########################################################\n") ## 3) Return some values # Load the latitude and longitude. return_this = get_hrrr_latlon() # Return the array with zero probability masked masked = NEP masked = np.ma.array(masked) masked[masked == 0] = np.ma.masked # also return the masked probabilities return_this['prob'] = masked return_this['members'] = len(members) return return_this
def TLE(validDATE, threshold=35, variable='REFC:entire', radius=9, fxx=range(2, 5), hours_span=0): ''' Compute the Time-Lagged Ensemble for a variable Input: DATE - Datetime object representing the valid date. threshold - The threshold value for which you wish to compute probability. variable - Variable string from the HRRR .idx file radius - The number of grid points the radial spatial filter uses fxx - A list of forecast hours (between 0 and 18) to use in the probability. day_span - Number hours +/- the valid hour to consider as members. Default 0 will only use the validDATE. If you set to 1, then will use additional model runs valid for +/- 1 hour the validDATE ''' # Generate a list of datetimes based on the validDATE and the hours_span sDATE = validDATE - timedelta(hours=hours_span) eDATE = validDATE + timedelta(hours=hours_span) DATES = [ sDATE + timedelta(hours=h) for h in range(int((eDATE - sDATE).seconds / 60 / 60) + 1) ] print() print('### Time-Lagged Ensemble###') print(' Valid Dates:\t%s' % DATES) print(' Variable:\t%s' % variable) print(' Threshold value:\t%s' % threshold) print(' Radial filter:\t %s grid points (%s km)' % (radius, (3 * radius))) print(' Forecast hours:\t%s' % (['f%02d' % f for f in fxx])) # Run spatial filter for each member: # List of inputs used for multiprocessing for each member inputs_list = [[d, f, threshold, variable, radius] for d in DATES for f in fxx] # Multiprocessing :) cpus = np.minimum(multiprocessing.cpu_count(), len(inputs_list)) p = multiprocessing.Pool(cpus) members = p.map(member_multipro, inputs_list) # The maximum number of point in the radial footprint max_points = np.sum(radial_footprint(radius)) # The TLE probability is the mean of the probability of "hits" within the radius member_mean = np.mean(members / max_points, axis=0) # Return the array with zero probability masked masked = member_mean masked = np.ma.array(masked) masked[masked == 0] = np.ma.masked # Load the latitude and longitude. return_this = get_hrrr_latlon() # also return the masked probabilities return_this['prob'] = masked return_this['members'] = len(members) return return_this