def run_comparison(var_name,input_date,interpolation_types,rep,loc_dictionary,cvar_dictionary,file_path_elev,elev_array,idx_list,phi_input=None,calc_phi=True,\ kernels={'temp':['316**2 * Matern(length_scale=[5e+05, 5e+05, 6.01e+03], nu=0.5)']\ ,'rh':['307**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'],\ 'pcp':['316**2 * Matern(length_scale=[5e+05, 5e+05, 4.67e+05], nu=0.5)'],\ 'wind':['316**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)']}): '''Execute the shuffle-split cross-validation for the given interpolation types Parameters interpolation_types (list of str): list of interpolation types to consider Returns interpolation_best (str): returns the selected interpolation type name ''' MAE_dict = {} for method in interpolation_types[var_name]: if method not in [ 'IDW2', 'IDW3', 'IDW4', 'IDEW2', 'IDEW3', 'IDEW4', 'TPS', 'GPR', 'RF' ]: print( 'The method %s is not currently a supported interpolation type.' % (method)) sys.exit() else: if method == 'IDW2': MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary, shapefile, 2, rep, False, res=10000) MAE_dict[method] = MAE if method == 'IDW3': MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary, shapefile, 3, rep, False, res=10000) MAE_dict[method] = MAE if method == 'IDW4': MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary, shapefile, 4, rep, False, res=10000) MAE_dict[method] = MAE if method == 'IDEW2': MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 2, rep, res=10000) MAE_dict[method] = MAE if method == 'IDEW3': MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 3, rep, res=10000) MAE_dict[method] = MAE if method == 'IDEW4': MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 4, rep, res=10000) MAE_dict[method] = MAE if method == 'TPS': MAE = tps.shuffle_split_tps(loc_dictionary, cvar_dictionary, shapefile, 10, res=10000) MAE_dict[method] = MAE if method == 'RF': MAE = rf.shuffle_split_rf(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 10, res=10000) MAE_dict[method] = MAE if method == 'GPR': MAE = gpr.shuffle_split_gpr(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, kernels[var_name], 10, res=10000) MAE_dict[method] = MAE best_method = min(MAE_dict, key=MAE_dict.get) print('The best method for %s is: %s' % (var_name, best_method)) if method == 'IDW2': choix_surf, maxmin = idw.IDW( loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, 2, False, res=10000) #Expand_area is not supported yet if method == 'IDW3': choix_surf, maxmin = idw.IDW( loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, 3, False, res=10000) #Expand_area is not supported yet if method == 'IDW4': choix_surf, maxmin = idw.IDW( loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, 4, False, res=10000) #Expand_area is not supported yet if method == 'IDEW2': choix_surf, maxmin, elev_array = idew.IDEW( loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, 2, False, res=10000) #Expand_area is not supported yet if method == 'IDEW3': choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, 3, False, res=10000) if method == 'IDEW4': choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, 4, False, res=10000) if method == 'TPS': choix_surf, maxmin = tps.TPS(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, phi_input, False, calc_phi, res=10000) if method == 'RF': choix_surf, maxmin = rf.random_forest_interpolator(loc_dictionary,cvar_dictionary,input_date,'Variable',shapefile,False,\ file_path_elev,idx_list,False,res=10000) if method == 'GPR': choix_surf, maxmin = gpr.GPR_interpolator(loc_dictionary,cvar_dictionary,input_date,'Variable',shapefile,False,\ file_path_elev,idx_list,False,kernels[var_name],0,False,False,res=10000) return best_method, choix_surf, maxmin
if var == 'start': days_dict, latlon_station = fwi.start_date_calendar_csv( file_path_daily, year ) #Get two things: start date for each station and the lat lon of the station elif var == 'end': days_dict, latlon_station = fwi.end_date_calendar_csv( file_path_daily, year, 'sep') else: print('That is not a correct variable!') if interpolator == 'IDW2': grd_size, maxmin = idw.IDW( latlon_station, days_dict, year, 'End Date (# Days since September 1)', shapefile, False, 2, True) try: inBoreal = GD.is_station_in_boreal( latlon_station, days_dict, boreal_shapefile) filtered_dict = { k: v for k, v in days_dict.items() if k in inBoreal } num_stations = len(filtered_dict.keys( )) #Number clusters= # stations / 3, /5, /10 cluster_num1 = int(round(num_stations / 3)) cluster_num2 = int(round(num_stations / 5)) cluster_num3 = int(round(num_stations / 10)) cluster_num,MAE,stdev_stations = idw.select_block_size_IDW(10,'clusters',latlon_station,days_dict,grd_size,shapefile,\
def execute_sequential_calc(file_path_hourly,file_path_daily,file_path_daily_csv,loc_dictionary_hourly, loc_dictionary_daily, date_dictionary,\ year,interpolation_types,rep,file_path_elev,idx_list,save_path,shapefile,shapefile2,phi_input=None,calc_phi=True,\ kernels={'temp':['316**2 * Matern(length_scale=[5e+05, 5e+05, 6.01e+03], nu=0.5)']\ ,'rh':['307**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'],\ 'pcp':['316**2 * Matern(length_scale=[5e+05, 5e+05, 4.67e+05], nu=0.5)'],\ 'wind':['316**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)']}): '''Execute the DC, DMC, FFMC seq calculations Parameters interpolation_types (list of str): list of interpolation types to consider Returns interpolation_best (str): returns the selected interpolation type name ''' #Fire season start and end dates start = time.time() start_dict, latlon_station = fwi.start_date_calendar_csv( file_path_daily_csv, year ) #Get two things: start date for each station and the lat lon of the station end_dict, latlon_station = fwi.end_date_calendar_csv( file_path_daily_csv, year, 'oct') #start searching from Oct 1 daysurface, maxmin = idw.IDW(latlon_station, start_dict, year, '# Days Since March 1', shapefile, False, 3, False, res=10000) #Interpolate the start date, IDW3 endsurface, maxmin = idw.IDW(latlon_station, end_dict, year, '# Days Since Oct 1', shapefile, False, 3, False, res=10000) #Interpolate the end date end_dc_vals = np.zeros(endsurface.shape) #For now, no overwinter procedure end = time.time() time_elapsed = (end - start) / 60 print('Finished getting season start & end dates, it took %s minutes' % (time_elapsed / 60)) #Initialize the input elev_array (which is stable) placeholder_surf, maxmin, elev_array = idew.IDEW(loc_dictionary_hourly,end_dict,'placeholder','Variable',shapefile,False,\ file_path_elev,idx_list,2,True,res=10000) #Get the dates in the fire season, overall, the surfaces will take care of masking sdate = pd.to_datetime(year + '-03-01').date( ) #Get the start date to start (if its too early everything will be masked out so can put any day before april) edate = pd.to_datetime( year + '-12-31').date() #End date, for right now it's Dec 31 #dates = list(pd.date_range(sdate,edate-timedelta(days=1),freq='d')) #Get the dates for all the potential days in the season dates = list(pd.date_range(sdate, edate, freq='d')) dc_list = [] dmc_list = [] ffmc_list = [] isi_list = [] bui_list = [] fwi_list = [] count = 0 for input_date in dates: print(input_date) gc.collect() #Get the dictionary start = time.time() temp = GD.get_noon_temp(str(input_date)[:-3], file_path_hourly) rh = GD.get_relative_humidity(str(input_date)[:-3], file_path_hourly) wind = GD.get_wind_speed(str(input_date)[:-3], file_path_hourly) pcp = GD.get_pcp( str(input_date)[0:10], file_path_daily, date_dictionary) end = time.time() time_elapsed = end - start print('Finished getting weather dictionaries, it took %s seconds' % (time_elapsed)) start = time.time() best_interp_temp, choice_surf_temp, maxmin = run_comparison( 'temp', input_date, interpolation_types, rep, loc_dictionary_hourly, temp, file_path_elev, elev_array, idx_list) best_interp_rh, choice_surf_rh, maxmin = run_comparison( 'rh', input_date, interpolation_types, rep, loc_dictionary_hourly, rh, file_path_elev, elev_array, idx_list) best_interp_wind, choice_surf_wind, maxmin = run_comparison( 'wind', input_date, interpolation_types, rep, loc_dictionary_hourly, wind, file_path_elev, elev_array, idx_list) best_interp_pcp, choice_surf_pcp, maxmin = run_comparison( 'pcp', input_date, interpolation_types, rep, loc_dictionary_daily, pcp, file_path_elev, elev_array, idx_list) end = time.time() time_elapsed = end - start print('Finished getting best methods & surfaces, it took %s seconds' % (time_elapsed)) #Get date index information year = str(input_date)[0:4] index = dates.index(input_date) dat = str(input_date) day_index = fwi.get_date_index(year, dat, 3) eDay_index = fwi.get_date_index(year, dat, 10) start = time.time() mask1 = fwi.make_start_date_mask(day_index, daysurface) if eDay_index < 0: endMask = np.ones( endsurface.shape) #in the case that the index is before Oct 1 else: endMask = fwi.make_end_date_mask(eDay_index, endsurface) if count > 0: dc_array = dc_list[ count - 1] #the last one added will be yesterday's val, but there's a lag bc none was added when count was0, so just use count-1 dmc_array = dmc_list[count - 1] ffmc_array = ffmc_list[count - 1] index = count - 1 dc = fwi.DC(input_date,choice_surf_pcp,choice_surf_rh,choice_surf_temp,choice_surf_wind,maxmin,\ dc_array,index,False,shapefile,mask1,endMask,None,False) dmc = fwi.DMC(input_date,choice_surf_pcp,choice_surf_rh,choice_surf_temp,choice_surf_wind,maxmin,\ dmc_array,index,False,shapefile,mask1,endMask) ffmc = fwi.FFMC(input_date,choice_surf_pcp,choice_surf_rh,choice_surf_temp,choice_surf_wind,maxmin,\ ffmc_array,index,False,shapefile,mask1,endMask) isi = fwi.ISI(ffmc,choice_surf_wind,maxmin,\ False,shapefile,mask1,endMask) bui = fwi.BUI(dmc,dc,maxmin,\ False,shapefile,mask1,endMask) fwi_val = fwi.FWI(isi,bui,maxmin,\ False,shapefile,mask1,endMask) dc_list.append(dc) dmc_list.append(dmc) ffmc_list.append(ffmc) isi_list.append(isi) bui_list.append(bui) fwi_list.append(fwi_val) else: rain_shape = choice_surf_pcp.shape dc_initialize = np.zeros( rain_shape ) + 15 #merge with the other overwinter array once it's calculated dc_yesterday1 = dc_initialize * mask1 dc_list.append(dc_yesterday1) #placeholder rain_shape = choice_surf_pcp.shape dmc_initialize = np.zeros( rain_shape ) + 6 #merge with the other overwinter array once it's calculated dmc_yesterday1 = dmc_initialize * mask1 dmc_list.append(dmc_yesterday1) #placeholder rain_shape = choice_surf_pcp.shape ffmc_initialize = np.zeros( rain_shape ) + 85 #merge with the other overwinter array once it's calculated ffmc_yesterday1 = ffmc_initialize * mask1 ffmc_list.append(ffmc_yesterday1) #placeholder end = time.time() time_elapsed = end - start print('Finished getting DC for date in stream, it took %s seconds' % (time_elapsed)) count += 1 #prep to serialize dc_list = [x.tolist() for x in dc_list] dmc_list = [x.tolist() for x in dmc_list] ffmc_list = [x.tolist() for x in ffmc_list] isi_list = [x.tolist() for x in isi_list] bui_list = [x.tolist() for x in bui_list] fwi_list = [x.tolist() for x in fwi_list] with open(save_path + year + '_DC_auto_select.json', 'w') as fp: json.dump(dc_list, fp) with open(save_path + year + '_DC_auto_select.json', 'r') as fp: dc_list = json.load(fp) with open(save_path + year + '_DMC_auto_select.json', 'w') as fp: json.dump(dmc_list, fp) with open(save_path + year + '_FFMC_auto_select.json', 'w') as fp: json.dump(ffmc_list, fp) with open(save_path + year + '_ISI_auto_select.json', 'w') as fp: json.dump(isi_list, fp) with open(save_path + year + '_BUI_auto_select.json', 'w') as fp: json.dump(bui_list, fp) with open(save_path + year + '_FWI_auto_select.json', 'w') as fp: json.dump(fwi_list, fp) dc_list = [np.array(x) for x in dc_list] #convert to np array for plotting fwi.plot_june(dc_list, maxmin, year, 'DC', shapefile, shapefile2) return dc_list
def run_comparison( var_name, input_date, interpolation_types, rep, loc_dictionary, cvar_dictionary, file_path_elev, elev_array, idx_list, phi_input=None, calc_phi=True, kernels={ 'temp': ['316**2 * Matern(length_scale=[5e+05, 5e+05, 6.01e+03], nu=0.5)'], 'rh': ['307**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'], 'pcp': ['316**2 * Matern(length_scale=[5e+05, 5e+05, 4.67e+05], nu=0.5)'], 'wind': ['316**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'] }): '''Execute the shuffle-split cross-validation for the given interpolation types Parameters ---------- var_name : string name of weather variable you are interpolating input_date : string date of weather data (day of fire season) interpolation_types : list list of interpolation types to consider rep : int number of replications to run loc_dictionary : dictionary dictionary of station locations cvar_dictionary : dictionary dictionary containing the weather data for each station available file_path_elev : string path to the elevation lookup file elev_array : ndarray array for elevation, create using IDEW interpolation (this is a trick to speed up code) idx_list : int position of the elevation column in the lookup file phi_input : float smoothing parameter for the thin plate spline, if 0 no smoothing, default is None (it is calculated) calc_phi : bool whether to calculate phi in the function, if True, phi can = None kernels : dictionary the kernels for each weather variable for gaussian process regression Returns ---------- string - returns the selected interpolation type name ''' MAE_dict = {} for method in interpolation_types: if method not in [ 'IDW2', 'IDW3', 'IDW4', 'IDEW2', 'IDEW3', 'IDEW4', 'TPS', 'GPR', 'RF' ]: print( 'The method %s is not currently a supported interpolation type.' % (method)) sys.exit() else: if method == 'IDW2': MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary, shapefile, 2, rep, False) MAE_dict[method] = MAE if method == 'IDW3': MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary, shapefile, 3, rep, False) MAE_dict[method] = MAE if method == 'IDW4': MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary, shapefile, 4, rep, False) MAE_dict[method] = MAE if method == 'IDEW2': MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 2, rep) MAE_dict[method] = MAE if method == 'IDEW3': MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 3, rep) MAE_dict[method] = MAE if method == 'IDEW4': MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 4, rep) MAE_dict[method] = MAE if method == 'TPS': MAE = tps.shuffle_split_tps(loc_dictionary, cvar_dictionary, shapefile, 10) MAE_dict[method] = MAE if method == 'RF': MAE = rf.shuffle_split_rf(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 10) MAE_dict[method] = MAE if method == 'GPR': MAE = gpr.shuffle_split_gpr(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, kernels[var_name], 10) MAE_dict[method] = MAE best_method = min(MAE_dict, key=MAE_dict.get) print('The best method for %s is: %s' % (var_name, best_method)) if method == 'IDW2': choix_surf, maxmin = idw.IDW(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, 2, False) # Expand_area is not supported yet if method == 'IDW3': choix_surf, maxmin = idw.IDW(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, 3, False) # Expand_area is not supported yet if method == 'IDW4': choix_surf, maxmin = idw.IDW(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, 4, False) # Expand_area is not supported yet if method == 'IDEW2': choix_surf, maxmin, elev_array = idew.IDEW( loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, 2) # Expand_area is not supported yet if method == 'IDEW3': choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, 3) if method == 'IDEW4': choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, 4) if method == 'TPS': choix_surf, maxmin = tps.TPS(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, phi_input, False, calc_phi) if method == 'RF': choix_surf, maxmin = rf.random_forest_interpolator( loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, False) if method == 'GPR': choix_surf, maxmin = gpr.GPR_interpolator(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, False, kernels[var_name], None, None, False, False) return best_method, choix_surf, maxmin
def execute_sequential_calc( file_path_hourly, file_path_daily, file_path_daily_csv, loc_dictionary_hourly, loc_dictionary_daily, date_dictionary, year, interpolation_types, rep, file_path_elev, idx_list, save_path, shapefile, shapefile2, phi_input=None, calc_phi=True, kernels={ 'temp': ['316**2 * Matern(length_scale=[5e+05, 5e+05, 6.01e+03], nu=0.5)'], 'rh': ['307**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'], 'pcp': ['316**2 * Matern(length_scale=[5e+05, 5e+05, 4.67e+05], nu=0.5)'], 'wind': ['316**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'] }): '''Execute the DC, DMC, FFMC sequential calculations Parameters ---------- file_path_hourly : string path to hourly feather files file_path_daily : string path to daily feather files file_path_daily_csv : string path to daily csv files var_name : string name of weather variable you are interpolating input_date : string date of weather data (day of fire season) loc_dictionary_daily : dictionary dictionary of station locations (daily) loc_dictionary_hourly : dictionary dictionary of station locations (hourly) year : string year to execute sequential calculations for interpolation_types : list list of interpolation types to consider rep : int number of replications to run file_path_elev : string path to the elevation lookup file idx_list : int position of the elevation column in the lookup file save_path : string where in computer to save the output file to shapefile : string path to study area shapefile (ON + QC) shapefile2 : string path to boreal shapefile phi_input : float smoothing parameter for the thin plate spline, if 0 no smoothing, default is None (it is calculated) calc_phi : bool whether to calculate phi in the function, if True, phi can = None kernels : dictionary the kernels for each weather variable for gaussian process regression Returns ---------- list - list of array of FWI codes for each day in fire season ''' # Fire season start and end dates start = time.time() # Get two things: start date for each station and the lat lon of the station start_dict, latlon_station = fwi.start_date_calendar_csv( file_path_daily_csv, year) end_dict, latlon_station = fwi.end_date_calendar_csv( file_path_daily_csv, year, 'oct') # start searching from Oct 1 daysurface, maxmin = idw.IDW(latlon_station, start_dict, year, '# Days Since March 1', shapefile, False, 3, False) # Interpolate the start date, IDW3 endsurface, maxmin = idw.IDW(latlon_station, end_dict, year, '# Days Since Oct 1', shapefile, False, 3, False) # Interpolate the end date # For now, no overwinter procedure end_dc_vals = np.zeros(endsurface.shape) end = time.time() time_elapsed = (end - start) / 60 print('Finished getting season start & end dates, it took %s minutes' % (time_elapsed / 60)) # Initialize the input elev_array (which is stable) placeholder_surf, maxmin, elev_array = idew.IDEW(loc_dictionary_hourly, end_dict, 'placeholder', 'Variable', shapefile, False, file_path_elev, idx_list, 2) # Get the dates in the fire season, overall, the surfaces will take care of masking # Get the start date to start (if its too early everything will be masked out so can put any day before april) sdate = pd.to_datetime(year + '-03-01').date() # End date, for right now it's Dec 31 edate = pd.to_datetime(year + '-12-31').date() # Get the dates for all the potential days in the season dates = list(pd.date_range(sdate, edate - timedelta(days=1), freq='d')) dc_list = [] count = 0 for input_date in dates: print(input_date) gc.collect() # Get the dictionary start = time.time() temp = GD.get_noon_temp(str(input_date)[:-3], file_path_hourly) rh = GD.get_relative_humidity(str(input_date)[:-3], file_path_hourly) wind = GD.get_wind_speed(str(input_date)[:-3], file_path_hourly) pcp = GD.get_pcp( str(input_date)[0:10], file_path_daily, date_dictionary) end = time.time() time_elapsed = end - start print('Finished getting weather dictionaries, it took %s seconds' % (time_elapsed)) start = time.time() best_interp_temp, choice_surf_temp, maxmin = run_comparison( 'temp', input_date, interpolation_types, rep, loc_dictionary_hourly, temp, file_path_elev, elev_array, idx_list) best_interp_rh, choice_surf_rh, maxmin = run_comparison( 'rh', input_date, interpolation_types, rep, loc_dictionary_hourly, rh, file_path_elev, elev_array, idx_list) best_interp_wind, choice_surf_wind, maxmin = run_comparison( 'wind', input_date, interpolation_types, rep, loc_dictionary_hourly, wind, file_path_elev, elev_array, idx_list) best_interp_pcp, choice_surf_pcp, maxmin = run_comparison( 'pcp', input_date, interpolation_types, rep, loc_dictionary_daily, pcp, file_path_elev, elev_array, idx_list) end = time.time() time_elapsed = end - start print('Finished getting best methods & surfaces, it took %s seconds' % (time_elapsed)) # Get date index information year = str(input_date)[0:4] index = dates.index(input_date) dat = str(input_date) day_index = fwi.get_date_index(year, dat, 3) eDay_index = fwi.get_date_index(year, dat, 10) start = time.time() mask1 = fwi.make_start_date_mask(day_index, daysurface) if eDay_index < 0: # in the case that the index is before Oct 1 endMask = np.ones(endsurface.shape) else: endMask = fwi.make_end_date_mask(eDay_index, endsurface) if count > 0: # the last one added will be yesterday's val, but there's a lag bc none was added when count was0, so just use count-1 dc_array = dc_list[count - 1] index = count - 1 dc = fwi.DC(input_date, choice_surf_pcp, choice_surf_rh, choice_surf_temp, choice_surf_wind, maxmin, dc_array, index, False, shapefile, mask1, endMask, None, False) dc_list.append(dc) else: rain_shape = choice_surf_pcp.shape # merge with the other overwinter array once it's calculated dc_initialize = np.zeros(rain_shape) + 15 dc_yesterday1 = dc_initialize * mask1 dc_list.append(dc_yesterday1) # placeholder end = time.time() time_elapsed = end - start print('Finished getting DC for date in stream, it took %s seconds' % (time_elapsed)) count += 1 # prep to serialize dc_list = [x.tolist() for x in dc_list] with open(save_path + year + '_DC_auto_select.json', 'w') as fp: json.dump(dc_list, fp) with open(save_path + year + '_DC_auto_select.json', 'r') as fp: dc_list = json.load(fp) # convert to np array for plotting dc_list = [np.array(x) for x in dc_list] fwi.plot_july(dc_list, maxmin, year, 'DC', shapefile, shapefile2) fwi.plot_june(dc_list, maxmin, year, 'DC', shapefile, shapefile2) return dc_list
def stack_and_average(year1, year2, file_path_daily, file_path_hourly, shapefile, file_path_elev, idx_list, method): '''Get the fire season duration for every year in between the two input years and average them. Output the average array. Parameters ---------- year1 : int first year taken into account year2 : int last year taken into account file_path_daily : string path to the daily weather csv files from Environment & Climate Change Canada file_path_hourly : string path to the hourly feather files shapefile : string path to the study area shapefile file_path_elev : string path to the elevation lookup file idx_list : list column index of elevation information in the lookup file method : string type of interpolation to use to create the yearly arrays, one of: 'IDW2', 'IDW3', 'IDW4', 'TPSS', 'RF' Returns ---------- ndarray - average of each pixel of all the years considered in array format ''' list_of_arrays = [] for year in range(int(year1), int(year2) + 1): print('Processing...' + str(year)) days_dict, latlon_station = fwi.start_date_calendar_csv( file_path_daily, str(year)) end_dict, latlon_station2 = fwi.end_date_calendar_csv( file_path_daily, str(year), 'oct') if year >= 2020: hourly_dict, latlon_stationH = fwi.start_date_add_hourly( file_path_hourly, str(year)) hourly_end, latlon_stationE = fwi.end_date_add_hourly( file_path_hourly, str(year)) days_dict = combine_stations(days_dict, hourly_dict) latlon_station = combine_stations(latlon_station, latlon_stationH) end_dict = combine_stations(end_dict, hourly_end) latlon_station2 = combine_stations(latlon_station2, latlon_stationE) if method == 'IDW2': start_surface, maxmin = idw.IDW(latlon_station, days_dict, str(year), 'Start', shapefile, False, 2, True) end_surface, maxmin = idw.IDW(latlon_station2, end_dict, str(year), 'End', shapefile, False, 2, True) elif method == 'IDW3': start_surface, maxmin = idw.IDW(latlon_station, days_dict, str(year), 'Start', shapefile, False, 3, True) end_surface, maxmin = idw.IDW(latlon_station2, end_dict, str(year), 'End', shapefile, False, 3, True) elif method == 'IDW4': start_surface, maxmin = idw.IDW(latlon_station, days_dict, str(year), 'Start', shapefile, False, 4, True) end_surface, maxmin = idw.IDW(latlon_station2, end_dict, str(year), 'End', shapefile, False, 4, True) elif method == 'TPSS': num_stationsS = int(len(days_dict.keys())) phi_inputS = int(num_stations) - (math.sqrt(2 * num_stations)) num_stationsE = int(len(end_dict.keys())) phi_inputE = int(num_stations) - (math.sqrt(2 * num_stations)) start_surface, maxmin = tps.TPS(latlon_station, days_dict, str(year), 'Start', shapefile, False, phi_inputS, True, True) end_surface, maxmin = tps.TPS(latlon_station2, end_dict, str(year), 'End', shapefile, False, phi_inputE, True, True) elif method == 'RF': start_surface, maxmin = rf.random_forest_interpolator( latlon_station, days_dict, str(year), 'Start', shapefile, False, file_path_elev, idx_list, True) end_surface, maxmin = rf.random_forest_interpolator( latlon_station2, end_dict, str(year), 'End', shapefile, False, file_path_elev, idx_list, True) else: print( 'Either that method does not exist or there is no support for it. You can use IDW2-4, TPSS, or RF' ) dur_array = calc_season_duration(start_surface, end_surface, year) list_of_arrays.append(dur_array) voxels = np.dstack(list_of_arrays) #stack arrays based on depth averaged_voxels = np.array([[np.mean(x) for x in group] for group in voxels]) return averaged_voxels
yi = ds_gem.gridlat_0 Ei = ds_gem.HGT_P0_L1_GST0 # Initialize dataArray da_list = [] test_plots = False # Loop through time for t in ds_pts.Time_UTC: #.sel(Time_UTC=slice('2014-11-28T00:00:00', '2014-11-29T01:00:00')): # Get current time cval = ds_pts.sel(Time_UTC=t) print(t.values) # Set up IDW w = idw.IDW(x, y, xi, yi, mz=E, GridZ=Ei, power=2) # Check we have some observations if cval.notnull().sum() == 0: print(t) raise ValueError('No stations with data on this time step found') # De-trend (wrt Elevation) cval_grid = w.detrendedIDW(cval.values, 0, zeros=None) cval_grid = cval_grid.where(cval_grid >= 0).fillna(0) cval_grid = cval_grid.where(Ei.notnull()) # Replace original missing cells # Add time stamp cval_grid['Time_UTC'] = t # Store interpolated grid