def find_model(self, model_type, lead, init_time): model_dir = self.p.getstr('config', model_type + '_INPUT_DIR') # max_forecast = self.p.getint('config', model_type+'_MAX_FORECAST') forecasts = model_type + '_FORECASTS' max_forecast = util.getlistint(self.p.getstr('config', forecasts))[-1] init_interval = self.p.getint('config', model_type + '_INIT_INTERVAL') lead_check = lead time_check = init_time time_offset = 0 found = False while lead_check <= max_forecast: native_template = self.p.getraw('filename_templates', model_type + '_NATIVE_TEMPLATE') model_ss = sts.StringSub(self.logger, native_template, init=time_check, lead=str(lead_check).zfill(2)) model_file = model_ss.doStringSub() print("model file: " + model_file) model_path = os.path.join(model_dir, model_file) if os.path.exists(model_path): found = True break time_check = util.shift_time(time_check, -init_interval) lead_check = lead_check + init_interval if found: return model_path else: return ''
def getValidTime(self): if self.valid_time is not -1: return self.valid_time if self.init_time is not -1 and self.lead is not -1: return util.shift_time(self.init_time, self.lead) print("ERROR: Could not compute valid_time") exit()
def find_closest_before(self, dir, time, template): day_before = util.shift_time(time, -24) yesterday_file = self.search_day(dir, time, str(day_before)[0:8], template) today_file = self.search_day(dir, time, str(time)[0:8], template) if today_file == "": return yesterday_file else: return today_file
def get_lowest_forecast_at_valid(self, valid_time, dtype): out_file = "" day_before = util.shift_time(valid_time, -24) input_template = self.p.getraw('filename_templates', dtype + '_INPUT_TEMPLATE') # get all files in yesterday directory, get valid time from init/fcst # NOTE: This will only apply to forecasts up to 48 hours # If more is needed, will need to add parameter to specify number of # days to look back out_file = self.getLastFile(valid_time, day_before, input_template) out_file2 = self.getLastFile(valid_time, valid_time, input_template) if out_file2 == "": return out_file else: return out_file2
def getLastFile(self, valid_time, search_time, template): out_file = "" files = sorted( glob.glob("{:s}/{:s}/*".format(self.input_dir, str(search_time)[0:8]))) for fpath in files: f = os.path.join(str(search_time)[0:8], os.path.basename(fpath)) se = sts.StringExtract(self.logger, template, f) se.parseTemplate() fcst = se.leadHour if fcst is -1: print("ERROR: Could not pull forecast lead from f") exit init = se.getInitTime("%Y%m%d%H") v = util.shift_time(init, fcst) if v == valid_time: out_file = fpath return out_file
def find_model(self, lead, init_time, level, cur_model): model_dir = self.p.getstr('config', 'FCST_GRID_STAT_INPUT_DIR') #max_forecast = self.p.getint('config', 'FCST_MAX_FORECAST') max_forecast = cur_model[2] init_interval = self.p.getint('config', 'FCST_INIT_INTERVAL') lead_check = lead time_check = init_time time_offset = 0 found = False while lead_check <= max_forecast: #model_template = self.p.getraw('filename_templates', # 'FCST_GRID_STAT_INPUT_TEMPLATE') model_template = cur_model[0] # split by - to handle a level that is a range, such as 0-10 model_ss = sts.StringSub(self.logger, model_template, init=time_check, lead=str(lead_check).zfill(2), level=str(level.split('-')[0]).zfill(2)) model_file = model_ss.doStringSub() model_path = os.path.join(model_dir, model_file) if os.path.exists(model_path): found = True break elif os.path.exists(model_path + ".gz"): with gzip.open(model_path + ".gz", 'rb') as infile: with open(model_path, 'wb') as outfile: outfile.write(infile.read()) infile.close() outfile.close() # TODO: change model_path to path without gz # set found to true and break time_check = util.shift_time(time_check, -init_interval) lead_check = lead_check + init_interval if found: return model_path else: return ''
def getInitTime(self): if self.init_time is not -1: return self.init_time if self.valid_time is not -1 and self.lead is not -1: return util.shift_time(self.valid_time, -self.lead) return -1
obs_dir = "/raid/student/twiest/Scorecard/ST4" outdir = "/raid/efp/se2019/ftp/dtc/obs/pcp_combine/" #Create a list of valid times to search loop_time = calendar.timegm(time.strptime(sdate, "%Y%m%d%H")) end_time = calendar.timegm(time.strptime(edate, "%Y%m%d%H")) while loop_time <= end_time: #Create a list of files to loop over valid_time = time.strftime("%Y%m%d%H", time.gmtime(loop_time)) valid_hr = int(valid_time[8:10]) # Create 3 hour, pcp_combine fadd = "" for t in range(3): fadd = fadd+obs_dir+"/ST4."+util.shift_time(valid_time+"00",-t)[0:10]+".01h 1 " outfile3 = outdir+"/ST4."+valid_time+"_A03.nc" os.system("pcp_combine -add "+fadd+outfile3) if (valid_hr % 6) == 0: fadd6 = "" for s in range(6): fadd6 = fadd6+obs_dir+"/ST4."+util.shift_time(valid_time+"00",-s)[0:10]+".01h 1 " outfile6 = outdir+"/ST4."+valid_time+"_A06.nc" os.system("pcp_combine -add "+fadd6+outfile6) loop_time+=int(inc) fadd24 = ""
def get_accumulation(self, valid_time, accum, ob_type, is_forecast=False): # TODO: pass in template (input/native) so this isn't assumed file_template = self.p.getraw('filename_templates', ob_type + "_INPUT_TEMPLATE") if self.input_dir == "": self.logger.error(self.app_name + ": Must set data dir to run get_accumulation") exit self.add_arg("-add") if self.p.getbool('config', ob_type + '_IS_DAILY_FILE') is True: # loop accum times data_interval = self.p.getint('config', ob_type + '_DATA_INTERVAL') * 3600 for i in range(0, accum, data_interval): search_time = util.shift_time(valid_time, -i) # find closest file before time f = self.find_closest_before(self.input_dir, search_time, file_template) if f == "": continue # build level info string file_time = datetime.datetime.strptime(f[-18:-8], "%Y%m%d%H") v_time = datetime.datetime.strptime(search_time, "%Y%m%d%H") diff = v_time - file_time lead = int((diff.days * 24) / (data_interval / 3600)) lead += int((v_time - file_time).seconds / data_interval) - 1 fname = self.p.getstr( 'config', ob_type + '_' + str(accum) + '_FIELD_NAME') addon = "'name=\"" + fname + "\"; level=\"(" + \ str(lead) + ",*,*)\";'" self.add_input_file(f, addon) else: # not a daily file # if field that corresponds to search accumulation exists # in the files, # check the file with valid time before moving backwards in time if self.p.has_option( 'config', ob_type + '_' + str(accum) + '_FIELD_NAME') and ob_type != "NATIONAL_BLEND": fSts = sts.StringSub(self.logger, file_template, valid=valid_time, accum=str(accum).zfill(2)) # TODO: This assumes max 99 accumulation. # zfill to 3 if above that is possible search_file = os.path.join(self.input_dir, fSts.doStringSub()) if os.path.exists(search_file): data_type = self.p.getstr('config', ob_type + '_NATIVE_DATA_TYPE') if data_type == "GRIB": addon = accum elif data_type == "NETCDF": fname = self.p.getstr( 'config', ob_type + '_' + str(accum) + '_FIELD_NAME') addon = "'name=\"" + fname + "\"; level=\"(0,*,*)\";'" self.add_input_file(search_file, addon) self.set_output_dir(self.outdir) return start_time = valid_time last_time = util.shift_time(valid_time, -(int(accum) - 1)) total_accum = int(accum) # search_accum = total_accum search_accum = self.p.getint('config', ob_type + '_ACCUM') # loop backwards in time until you have a full set of accum while last_time <= start_time: if is_forecast: f = self.get_lowest_forecast_at_valid(start_time, ob_type) if f == "": break # TODO: assumes 1hr accum (6 for NB) in these files for now if ob_type == "NATIONAL_BLEND": ob_str = self.p.getstr( 'config', ob_type + '_' + str(6) + '_FIELD_NAME') addon = "'name=\"" + ob_str + "\"; level=\"(0,*,*)\";'" else: ob_str = self.p.getstr( 'config', ob_type + '_' + str(1) + '_FIELD_NAME') addon = "'name=\"" + ob_str + "\"; level=\"(0,*,*)\";'" self.add_input_file(f, addon) start_time = util.shift_time(start_time, -1) search_accum -= 1 else: # not looking for forecast files # get all files of valid_time (all accums) # files = sorted(glob.glob("{:s}/{:s}/*{:s}*" # .format(self.input_dir, # start_time[0:8], start_time))) print("INPUTDIR IS:" + self.input_dir + " and START TIME IS:" + start_time) files = sorted( glob.glob("{:s}/*{:s}*".format(self.input_dir, start_time))) # look for biggest accum that fits search while search_accum > 0: fSts = sts.StringSub(self.logger, file_template, valid=start_time, accum=str(search_accum).zfill(2)) search_file = os.path.join(self.input_dir, fSts.doStringSub()) f = None for file in files: if file == search_file: f = file break # if found a file, add it to input list with info if f is not None: addon = "" data_type = self.p.getstr( 'config', ob_type + '_NATIVE_DATA_TYPE') if data_type == "GRIB": addon = search_accum elif data_type == "NETCDF": ob_str = self.p.getstr( 'config', ob_type + '_' + str(search_accum) + '_FIELD_NAME') addon = "'name=\"" + ob_str + \ "\"; level=\"(0,*,*)\";'" self.add_input_file(f, addon) start_time = util.shift_time(start_time + "00", -search_accum)[0:10] total_accum -= search_accum # search_accum = total_accum break search_accum -= 1 if total_accum == 0: break if search_accum == 0: self.logger.warning(self.app_name + ": Could not find " \ "files to compute accumulation for " \ + ob_type) return None self.set_output_dir(self.outdir)
def main(): logger = logging.getLogger('run_example') init_time = 0 start_time = 0 end_time = 0 time_interval = 1 short_opts = "c:r:h" long_opts = ["config=", "help", "runtime="] # All command line input, get options and arguments try: opts, args = getopt.gnu_getopt(sys.argv[1:], short_opts, long_opts) except getopt.GetoptError as err: print(str(err)) usage('SCRIPT IS EXITING DUE TO UNRECOGNIZED COMMAND LINE OPTION') for k, v in opts: if k in ('-c', '--config'): # adds the conf file to the list of arguments. args.append(config_launcher.set_conf_file_path(v)) elif k in ('-h', '--help'): usage() exit() elif k in ('-r', '--runtime'): start_time = v end_time = v else: assert False, "UNHANDLED OPTION" if not args: args = None (parm, infiles, moreopt) = config_launcher.parse_launch_args(args, usage, None, logger) p = config_launcher.launch(infiles, moreopt) logger = util.get_logger(p) logger.setLevel(logging.DEBUG) if start_time == 0: start_time = p.getstr('config', 'START_TIME') end_time = p.getstr('config', 'END_TIME') time_interval = p.getstr('config', 'TIME_INTERVAL') # Get the list of processes to call process_list = util.getlist(p.getstr('config', 'PROCESS_LIST')) model_type = p.getstr('config', 'MODEL_TYPE') fcst_vars = util.getlist(p.getstr('config', 'FCST_VARS')) lead_seq = util.getlistint(p.getstr('config', 'LEAD_SEQ')) init_time = start_time while init_time <= end_time: print("") print("****************************************") print("* RUNNING MET+") print("* EVALUATING " + model_type + " at init time: " + init_time) print("****************************************") logger.info("****************************************") logger.info("* RUNNING MET+") logger.info("* EVALUATING " + model_type + " at init time: " + init_time) logger.info("****************************************") for lead in lead_seq: for fcst_var in fcst_vars: # loop over models to compare accums = util.getlist(p.getstr('config', fcst_var + "_ACCUM")) ob_types = util.getlist( p.getstr('config', fcst_var + "_OBTYPE")) for accum in accums: for ob_type in ob_types: if lead < int(accum): continue obs_var = p.getstr('config', ob_type + "_VAR") logger.info("") logger.info("") logger.info("For " + init_time + " F" + str(lead) + ", processing " + model_type + "_" + fcst_var + "_" + accum + " vs " + ob_type + " " + obs_var + "_" + accum) valid_time = util.shift_time(init_time, lead) data_interval = p.getint('config', ob_type + '_DATA_INTERVAL') if int(valid_time[8:10]) % data_interval != 0: logger.warning("No observation for valid time: " + valid_time + ". Skipping...") continue for process in process_list: if process == "pcp_combine": run_pcp = CG_pcp_combine(p, logger) run_pcp.run_at_time(valid_time, accum, ob_type, fcst_var) elif process == "regrid_data_plane": run_regrid = CG_regrid_data_plane(p, logger) run_regrid.run_at_time(valid_time, accum, ob_type) elif process == "grid_stat": run_grid_stat = CG_grid_stat(p, logger) run_grid_stat.run_at_time( init_time, lead, accum, ob_type, fcst_var) else: print("ERROR: Invalid process in process list") exit(1) init_time = util.shift_time(init_time, int(time_interval)) (logger).info("END OF EXECUTION")
def run_at_time_once(self, ti, v): valid_time = ti.getValidTime() init_time = ti.getInitTime() ensemble_stat_base_dir = self.p.getstr('config', 'ENSEMBLE_STAT_OUT_DIR') #if self.p.getbool('config', 'LOOP_BY_INIT'): ensemble_stat_out_dir = os.path.join(ensemble_stat_base_dir, init_time, "ensemble_stat") #else: # ensemble_stat_out_dir = os.path.join(ensemble_stat_base_dir, # valid_time, "ensemble_stat") fcst_level = v.fcst_level fcst_level_type = "" if (fcst_level[0].isalpha()): fcst_level_type = fcst_level[0] fcst_level = fcst_level[1:] #obs_level = v.obs_level #obs_level_type = "" #if(obs_level[0].isalpha()): # obs_level_type = obs_level[0] # obs_level = obs_level[1:] model_type = self.p.getstr('config', 'MODEL_TYPE') #obs_dir = self.p.getstr('config', 'OBS_GRID_STAT_INPUT_DIR') #obs_template = os.path.expandvars(self.p.getraw('filename_templates', # 'OBS_GRID_STAT_INPUT_TEMPLATE')) model_dir = self.p.getstr('config', 'FCST_ENSEMBLE_STAT_INPUT_DIR') config_dir = self.p.getstr('config', 'CONFIG_DIR') ymd_v = valid_time[0:8] if not os.path.exists(ensemble_stat_out_dir): os.makedirs(ensemble_stat_out_dir) # get model to compare model_path = self.find_models(ti.lead, init_time, fcst_level) if model_path == "": print("ERROR: COULD NOT FIND FILE IN " + model_dir) return self.add_input_file(model_path) #if self.p.getbool('config','OBS_EXACT_VALID_TIME', True): # obsSts = sts.StringSub(self.logger, # obs_template, # valid=valid_time, # init=init_time, # level=str(obs_level.split('-')[0]).zfill(2)) # obs_file = obsSts.doStringSub() # obs_path = os.path.join(obs_dir, obs_file) #else: # obs_path = self.find_obs(ti, v) ##print(model_path) ##print(obs_path) ##print(" ") #self.add_input_file(obs_path) self.set_param_file(self.p.getstr('config', 'ENSEMBLE_STAT_CONFIG')) self.set_output_dir(ensemble_stat_out_dir) self.set_input_file_num(self.p.getstr('config', "ENSEMBLE_FILE_NUM")) # set up environment variables for each grid_stat run # get fcst and obs thresh parameters # verify they are the same size mask_dir = self.p.getstr('config', 'MASK_DIR') mask_name = self.p.getstr('config', 'MASK_FILE') if float(valid_time[8:10]) < 12: #Use previous day's grid grid_date = datetime.datetime.strptime( util.shift_time(valid_time, -24), "%Y%m%d%H%M").strftime("%Y%m%d") else: grid_date = valid_time[0:8] if os.path.isfile(mask_dir + "/" + grid_date + "_" + mask_name + ".nc"): verif_grid = mask_dir + "/" + grid_date + "_" + mask_name + ".nc" else: from create_met_poly import create_mask mfiles = glob.glob(model_path) verif_grid = create_mask(grid_date, mfiles[0], mask_name) os.environ["MASK_DIR_IN"] = "/raid/efp/se2018/ftp/dtc/mask" fcst_str = "FCST_" + v.fcst_name + "_" + fcst_level + "_THRESH" #obs_str = "OBS_"+v.obs_name+"_"+obs_level+"_THRESH" fcst_cat_thresh = "" #obs_cat_thresh = "" fcst_threshs = [] #obs_threshs = [] if self.p.has_option('config', fcst_str): fcst_threshs = util.getlistfloat(self.p.getstr('config', fcst_str)) fcst_cat_thresh = "cat_thresh=[ " for fcst_thresh in fcst_threshs: fcst_cat_thresh += "ge" + str(fcst_thresh) + ", " fcst_cat_thresh = fcst_cat_thresh[0:-2] + " ];" #if self.p.has_option('config', obs_str): # obs_threshs = util.getlistfloat(self.p.getstr('config', obs_str)) # obs_cat_thresh = "cat_thresh=[ " # for obs_thresh in obs_threshs: # obs_cat_thresh += "ge"+str(obs_thresh)+", " # obs_cat_thresh = obs_cat_thresh[0:-2]+" ];" #if len(fcst_threshs) != len(obs_threshs): # self.logger.error("run_example: Number of forecast and "\ # "observation thresholds must be the same") # exit(1) # TODO: Allow NetCDF level with more than 2 dimensions i.e. (1,*,*) # TODO: Need to check data type for PROB fcst? non PROB obs? fcst_field = "" #obs_field = "" # TODO: change PROB mode to put all cat thresh values in 1 item if self.p.getbool('config', 'FCST_IS_PROB'): for fcst_thresh in fcst_threshs: fcst_field += "{ name=\"PROB\"; level=\""+fcst_level_type + \ fcst_level.zfill(2) + "\"; prob={ name=\"" + \ v.fcst_name + \ "\"; thresh_lo="+str(fcst_thresh)+"; } }," #for obs_thresh in obs_threshs: # obs_field += "{ name=\""+v.obs_name+"_"+obs_level.zfill(2) + \ # "\"; level=\"(*,*)\"; cat_thresh=[ gt" + \ # str(obs_thresh)+" ]; }," else: # data_type = self.p.getstr('config', 'OBS_NATIVE_DATA_TYPE') #obs_data_type = util.get_filetype(self.p, obs_path) model_data_type = util.get_filetype(self.p, model_path) #if obs_data_type == "NETCDF": # obs_field += "{ name=\"" + v.obs_name+"_" + obs_level.zfill(2) + \ # "\"; level=\"(*,*)\"; " #else: # obs_field += "{ name=\""+v.obs_name + \ # "\"; level=\"["+obs_level_type + \ # obs_level.zfill(2)+"]\"; " if model_data_type == "NETCDF": fcst_field += "{ name=\""+v.fcst_name+"_"+fcst_level.zfill(2) + \ "\"; level=\"(*,*)\"; " else: fcst_field += "{ name=\""+v.fcst_name + \ "\"; level=\"["+fcst_level_type + \ fcst_level.zfill(2)+"]\"; " fcst_field += fcst_cat_thresh + " }," # obs_field += "{ name=\"" + v.obs_name+"_" + obs_level.zfill(2) + \ # "\"; level=\"(*,*)\"; " #if self.p.has_option('config',"OBS_"+v.obs_name+"_"+obs_level+"_CENSOR_VAL"): # obs_censor_val = self.p.getraw('config',"OBS_"+v.obs_name+"_"+obs_level+"_CENSOR_VAL") # obs_field += "obs_censor_val = "+obs_censor_val+" ;" #if self.p.has_option('config',"OBS_"+v.obs_name+"_"+obs_level+"_CENSOR_THRESH"): # obs_censor_thresh = self.p.getraw('config',"OBS_"+v.obs_name+"_"+obs_level+"_CENSOR_THRESH") # obs_field += "obs_censor_thresh = "+obs_censor_thresh+" ;" #obs_field += obs_cat_thresh+ " }," # remove last comma and } to be added back after extra options fcst_field = fcst_field[0:-2] #obs_field = obs_field[0:-2] fcst_field += v.fcst_extra + "}" #obs_field += v.obs_extra+"}" #ob_type = self.p.getstr('config', "OB_TYPE") self.add_env_var("MODEL", model_type) self.add_env_var("FCST_VAR", v.fcst_name) #self.add_env_var("OBS_VAR", v.obs_name) # TODO: Change ACCUM to LEVEL in GridStatConfig_MEAN/PROB and here self.add_env_var("ACCUM", v.fcst_level) #self.add_env_var("OBTYPE", ob_type) self.add_env_var("CONFIG_DIR", config_dir) self.add_env_var("FCST_FIELD", fcst_field) #self.add_env_var("OBS_FIELD", obs_field) self.add_env_var("MET_VALID_HHMM", valid_time[4:8]) self.add_env_var("VERIF_MASK", verif_grid) cmd = self.get_command() self.logger.debug("") self.logger.debug("ENVIRONMENT FOR NEXT COMMAND: ") self.print_env_item("MODEL") self.print_env_item("FCST_VAR") #self.print_env_item("OBS_VAR") self.print_env_item("ACCUM") #self.print_env_item("OBTYPE") self.print_env_item("CONFIG_DIR") self.print_env_item("FCST_FIELD") #self.print_env_item("OBS_FIELD") self.print_env_item("MET_VALID_HHMM") self.print_env_item("VERIF_MASK") self.logger.debug("") self.logger.debug("COPYABLE ENVIRONMENT FOR NEXT COMMAND: ") self.print_env_copy([ "MODEL", "FCST_VAR", "ACCUM", "CONFIG_DIR", "FCST_FIELD", "MET_VALID_HHMM" ]) self.logger.debug("") cmd = self.get_command() if cmd is None: print("ERROR: ensemble_stat could not generate command") return self.logger.info("") self.build() self.clear()