def create_and_set_output_dir(self, time_info): """! Builds the full output dir path with valid or init time Creates output directory if it doesn't already exist Args: @param time_info dictionary with time information """ out_dir = self.c_dict['OUTPUT_DIR'] # use output template if it is set # if output template is not set, do not add any extra directories to path out_template_name = '{}_OUTPUT_TEMPLATE'.format(self.app_name.upper()) if self.config.has_option('filename_templates', out_template_name): template = self.config.getraw('filename_templates', out_template_name) # perform string substitution to get full path string_sub = sts.StringSub(self.logger, template, **time_info) extra_path = string_sub.do_string_sub() out_dir = os.path.join(out_dir, extra_path) # create full output dir if it doesn't already exist if not os.path.exists(out_dir): os.makedirs(out_dir) # set output dir for wrapper self.outdir = out_dir
def getLowestForecastFile(self, valid_time, dtype, template): """!Find the lowest forecast hour that corresponds to the valid time Args: @param valid_time valid time to search @param dtype data type (FCST or OBS) to get filename template @rtype string @return Path to file with the lowest forecast hour""" out_file = None # search for file with lowest forecast, then loop up into you find a valid one min_forecast = self.c_dict[dtype + '_MIN_FORECAST'] max_forecast = self.c_dict[dtype + '_MAX_FORECAST'] forecast_lead = min_forecast while forecast_lead <= max_forecast: input_dict = {} input_dict['valid'] = valid_time input_dict['lead_hours'] = forecast_lead time_info = time_util.ti_calculate(input_dict) fSts = sts.StringSub(self.logger, template, **time_info) search_file = os.path.join(self.input_dir, fSts.do_string_sub()) search_file = util.preprocess_file( search_file, self.c_dict[dtype + '_INPUT_DATATYPE'], self.config) if search_file != None: return search_file forecast_lead += 1 return None
def find_models(self, lead, init_time, level): model_dir = self.p.getstr('config', 'FCST_ENSEMBLE_STAT_INPUT_DIR') max_forecast = self.p.getint('config', 'FCST_MAX_FORECAST') lead_check = lead time_check = init_time time_offset = 0 found = False #while lead_check <= max_forecast: model_template = os.path.expandvars( self.p.getraw('filename_templates', 'FCST_ENSEMBLE_STAT_INPUT_TEMPLATE')) # split by - to handle a level that is a range, such as 0-10 model_ss = sts.StringSub(self.logger, model_template, init=time_check, lead=str(lead_check).zfill(2), level=str(level.split('-')[0]).zfill(2)) model_file = model_ss.doStringSub() model_path = os.path.join(model_dir, model_file) model_path_list = glob.glob(model_path) if model_path_list: found = True #break if found: return model_path else: return ''
def find_model(self, model_type, lead, init_time): model_dir = self.p.getstr('config', model_type + '_INPUT_DIR') # max_forecast = self.p.getint('config', model_type+'_MAX_FORECAST') forecasts = model_type + '_FORECASTS' max_forecast = util.getlistint(self.p.getstr('config', forecasts))[-1] init_interval = self.p.getint('config', model_type + '_INIT_INTERVAL') lead_check = lead time_check = init_time time_offset = 0 found = False while lead_check <= max_forecast: native_template = self.p.getraw('filename_templates', model_type + '_NATIVE_TEMPLATE') model_ss = sts.StringSub(self.logger, native_template, init=time_check, lead=str(lead_check).zfill(2)) model_file = model_ss.doStringSub() print("model file: " + model_file) model_path = os.path.join(model_dir, model_file) if os.path.exists(model_path): found = True break time_check = util.shift_time(time_check, -init_interval) lead_check = lead_check + init_interval if found: return model_path else: return ''
def run_at_time_once(self, time_info): """! Runs the MET application for a given time and forecast lead combination Args: @param time_info dictionary containing timing information """ valid_time = time_info['valid'] input_dir = self.config.getdir('GEMPAKTOCF_INPUT_DIR') input_template = self.config.getraw('filename_templates', 'GEMPAKTOCF_INPUT_TEMPLATE') output_dir = self.config.getdir('GEMPAKTOCF_OUTPUT_DIR') output_template = self.config.getraw('filename_templates', 'GEMPAKTOCF_OUTPUT_TEMPLATE') gsts = sts.StringSub(self.logger, input_template, valid=valid_time) infile = os.path.join(input_dir, gsts.do_string_sub()) self.infiles.append(infile) gsts = sts.StringSub(self.logger, output_template, valid=valid_time) outfile = os.path.join(output_dir, gsts.do_string_sub()) if os.path.exists(outfile) and \ self.config.getbool('config', 'GEMPAKTOCF_SKIP_IF_OUTPUT_EXISTS', False) is True: self.logger.debug('Skip writing output file {} because it already ' 'exists. Remove file or change ' 'GEMPAKTOCF_SKIP_IF_OUTPUT_EXISTS to True to process' .format(outfile)) return self.set_output_path(outfile) if not os.path.exists(os.path.dirname(outfile)): os.makedirs(os.path.dirname(outfile)) cmd = self.get_command() if cmd is None: self.logger.error("Could not generate command") return self.build()
def get_verification_mask(self, time_info): """!If verification mask template is set in the config file, use it to find the verification mask filename""" self.c_dict['VERIFICATION_MASK'] = '' if self.c_dict['VERIFICATION_MASK_TEMPLATE'] != '': template = self.c_dict['VERIFICATION_MASK_TEMPLATE'] string_sub = sts.StringSub(self.logger, template, **time_info) filename = string_sub.do_string_sub() self.c_dict['VERIFICATION_MASK'] = filename return
def find_input_file(self, in_template, search_time, search_accum, data_src): fSts = sts.StringSub(self.logger, in_template, valid=search_time, level=(int(search_accum) * 3600)) search_file = os.path.join(self.input_dir, fSts.do_string_sub()) return util.preprocess_file(search_file, self.c_dict[data_src + '_INPUT_DATATYPE'], self.config)
def run_at_time_once(self, valid_time, accum, ob_type): obs_var = self.p.getstr('config', ob_type + "_VAR") bucket_dir = self.p.getstr('config', ob_type + '_BUCKET_DIR') bucket_template = self.p.getraw('filename_templates', ob_type + '_BUCKET_TEMPLATE') regrid_dir = self.p.getstr('config', ob_type + '_REGRID_DIR') regrid_template = self.p.getraw('filename_templates', ob_type + '_REGRID_TEMPLATE') ymd_v = valid_time[0:8] if not os.path.exists(os.path.join(regrid_dir, ymd_v)): os.makedirs(os.path.join(regrid_dir, ymd_v)) pcpSts = sts.StringSub(self.logger, bucket_template, valid=valid_time, accum=str(accum).zfill(2)) outfile = os.path.join(bucket_dir, pcpSts.doStringSub()) self.add_input_file(outfile) self.add_input_file(self.p.getstr('config', 'VERIFICATION_GRID')) regridSts = sts.StringSub(self.logger, regrid_template, valid=valid_time, accum=str(accum).zfill(2)) regrid_file = regridSts.doStringSub() self.set_output_path(os.path.join(regrid_dir, regrid_file)) field_name = "{:s}_{:s}".format(obs_var, str(accum).zfill(2)) self.add_arg( "-field 'name=\"{:s}\"; level=\"(*,*)\";'".format(field_name)) self.add_arg("-method BUDGET") self.add_arg("-width 2") self.add_arg("-name " + field_name) cmd = self.get_command() if cmd is None: print("ERROR: regrid_data_plane could not generate command") return # print("RUNNING: " + str(cmd)) self.logger.info("") self.build() self.clear()
def run_at_time_once(self, valid_time, level, compare_var): bucket_dir = self.p.getdir('OBS_REGRID_DATA_PLANE_INPUT_DIR') input_template = self.p.getraw('filename_templates', 'OBS_REGRID_DATA_PLANE_TEMPLATE') regrid_dir = self.p.getdir('OBS_REGRID_DATA_PLANE_OUTPUT_DIR') regrid_template = self.p.getraw('filename_templates', 'OBS_REGRID_DATA_PLANE_TEMPLATE') ymd_v = valid_time[0:8] if not os.path.exists(os.path.join(regrid_dir, ymd_v)): os.makedirs(os.path.join(regrid_dir, ymd_v)) pcpSts = sts.StringSub(self.logger, input_template, valid=valid_time, level=str(level).zfill(2)) outfile = os.path.join(bucket_dir, pcpSts.doStringSub()) self.add_input_file(outfile) self.add_input_file(self.p.getstr('config', 'VERIFICATION_GRID')) regridSts = sts.StringSub(self.logger, regrid_template, valid=valid_time, level=str(level).zfill(2)) regrid_file = regridSts.doStringSub() self.set_output_path(os.path.join(regrid_dir, regrid_file)) field_name = "{:s}_{:s}".format(compare_var, str(level).zfill(2)) self.add_arg( "-field 'name=\"{:s}\"; level=\"(*,*)\";'".format(field_name)) self.add_arg("-method BUDGET") self.add_arg("-width 2") self.add_arg("-name " + field_name) cmd = self.get_command() if cmd is None: self.logger.error("regrid_data_plane could not generate command") return self.logger.info("") self.build() self.clear()
def get_daily_file(self, time_info, accum, data_src, file_template): """!Pull accumulation out of file that contains a full day of data Args: @param valid_time valid time to search @param accum accumulation to extract from file @param data_src type of data (FCST or OBS) @param file_template filename template to search @rtype bool @return True if file was added to output list, False if not""" data_interval = self.c_dict[data_src + '_DATA_INTERVAL'] times_per_file = self.c_dict[data_src + '_TIMES_PER_FILE'] search_file = None # loop from valid_time back to data interval * times per file for i in range(0, times_per_file + 1): search_time = time_info['valid'] - datetime.timedelta( hours=(i * data_interval)) # check if file exists dSts = sts.StringSub(self.logger, file_template, valid=search_time) search_file = os.path.join(self.input_dir, dSts.do_string_sub()) search_file = util.preprocess_file(search_file, self.c_dict[data_src+\ '_INPUT_DATATYPE'], self.config) if search_file is not None: break if search_file == None: return False diff = time_info['valid'] - search_time # Specifying integer division // Python 3, # assuming that was the intent in Python 2. lead = int((diff.days * 24) // (data_interval)) lead += int((diff).seconds // (data_interval * 3600)) - 1 # calling config.conf version of getter so default value is not # set in log and final conf because it is unnecessary fname = self.config.conf.getstr( 'config', data_src + '_PCP_COMBINE_' + str(accum) + '_FIELD_NAME', '') if fname == '': self.logger.error( 'NetCDF field name was not set in config: {}'.format( data_src + '_PCP_COMBINE_' + str(accum) + '_FIELD_NAME')) return False addon = "'name=\"" + fname + "\"; level=\"(" + \ str(lead) + ",*,*)\";'" self.add_input_file(search_file, addon) return True
def setup_add_method(self, time_info, var_info, data_src): """!Setup pcp_combine to add files to build desired accumulation Args: @param ti time_info object containing timing information @param v var_info object containing variable information @params rl data type (FCST or OBS) @rtype string @return path to output file""" is_forecast = False if data_src == "FCST": is_forecast = True self.clear() self.set_method("ADD") if data_src == "FCST": accum = var_info.fcst_level compare_var = var_info.fcst_name else: accum = var_info.obs_level compare_var = var_info.obs_name if accum[0].isalpha(): accum = accum[1:] init_time = time_info['init_fmt'] valid_time = time_info['valid_fmt'] in_dir, in_template = self.get_dir_and_template(data_src, 'INPUT') out_dir, out_template = self.get_dir_and_template(data_src, 'OUTPUT') # check _PCP_COMBINE_INPUT_DIR to get accumulation files self.set_input_dir(in_dir) if not self.get_accumulation(time_info, int(accum), data_src, is_forecast): return None infiles = self.get_input_files() self.set_output_dir(out_dir) time_info['level'] = int(accum) * 3600 pcpSts = sts.StringSub(self.logger, out_template, **time_info) pcp_out = pcpSts.doStringSub() self.set_output_filename(pcp_out) self.add_arg("-name " + compare_var + "_" + str(accum).zfill(2)) return self.get_command()
def setup_sum_method(self, time_info, var_info, rl): """!Setup pcp_combine to build desired accumulation based on init/valid times and accumulations Args: @param ti time_info object containing timing information @param v var_info object containing variable information @params rl data type (FCST or OBS) @rtype string @return path to output file""" self.clear() in_accum = self.c_dict[rl+'_LEVEL'] if in_accum == -1: in_accum = 0 in_dir, in_template = self.get_dir_and_template(rl, 'INPUT') out_dir, out_template = self.get_dir_and_template(rl, 'OUTPUT') out_accum = var_info.obs_level if out_accum[0].isalpha(): out_accum = out_accum[1:] init_time = time_info['init_fmt'] valid_time = time_info['valid_fmt'] time_info['level'] = int(out_accum) * 3600 in_regex = util.template_to_regex(in_template, time_info, self.logger) in_regex_split = in_regex.split('/') in_dir = os.path.join(in_dir, *in_regex_split[0:-1]) in_regex = in_regex_split[-1] self.set_method("SUM") self.set_init_time(init_time) self.set_valid_time(valid_time) self.set_in_accum(in_accum) self.set_out_accum(out_accum) self.set_pcp_dir(in_dir) self.set_pcp_regex(in_regex) self.set_output_dir(out_dir) pcpSts = sts.StringSub(self.logger, out_template, **time_info) pcp_out = pcpSts.doStringSub() self.set_output_filename(pcp_out) return self.get_command()
def setup_derive_method(self, time_info, var_info, data_src): """!Setup pcp_combine to derive stats Args: @param ti time_info object containing timing information @param var_info object containing variable information @params data_src data type (FCST or OBS) @rtype string @return path to output file""" is_forecast = False if data_src == "FCST": is_forecast = True self.clear() self.set_method("DERIVE") # set field info if data_src == "FCST": self.field_level = var_info.fcst_level self.field_name = var_info.fcst_name self.field_extra = var_info.fcst_extra else: self.field_level = var_info.obs_level self.field_name = var_info.obs_name self.field_extra = var_info.obs_extra in_dir, in_template = self.get_dir_and_template(data_src, 'INPUT') out_dir, out_template = self.get_dir_and_template(data_src, 'OUTPUT') # get files lookback = self.c_dict[data_src+'_DERIVE_LOOKBACK'] if not self.get_accumulation(time_info, lookback, data_src, is_forecast): return None infiles = self.get_input_files() # set output self.set_output_dir(out_dir) time_info['level'] = int(accum) * 3600 pcpSts = sts.StringSub(self.logger, out_template, **time_info) pcp_out = pcpSts.doStringSub() self.set_output_filename(pcp_out) return self.get_command()
def find_model(self, lead, init_time, level, cur_model): model_dir = self.p.getstr('config', 'FCST_GRID_STAT_INPUT_DIR') #max_forecast = self.p.getint('config', 'FCST_MAX_FORECAST') max_forecast = cur_model[2] init_interval = self.p.getint('config', 'FCST_INIT_INTERVAL') lead_check = lead time_check = init_time time_offset = 0 found = False while lead_check <= max_forecast: #model_template = self.p.getraw('filename_templates', # 'FCST_GRID_STAT_INPUT_TEMPLATE') model_template = cur_model[0] # split by - to handle a level that is a range, such as 0-10 model_ss = sts.StringSub(self.logger, model_template, init=time_check, lead=str(lead_check).zfill(2), level=str(level.split('-')[0]).zfill(2)) model_file = model_ss.doStringSub() model_path = os.path.join(model_dir, model_file) if os.path.exists(model_path): found = True break elif os.path.exists(model_path + ".gz"): with gzip.open(model_path + ".gz", 'rb') as infile: with open(model_path, 'wb') as outfile: outfile.write(infile.read()) infile.close() outfile.close() # TODO: change model_path to path without gz # set found to true and break time_check = util.shift_time(time_check, -init_interval) lead_check = lead_check + init_interval if found: return model_path else: return ''
def find_model_members(self, time_info): """! Finds the model member files to compare Args: @param time_info dictionary containing timing information @rtype string @return Returns a list of the paths to the ensemble model files """ model_dir = self.c_dict['FCST_INPUT_DIR'] # used for filling in missing files to ensure ens_thresh check is accurate fake_dir = '/ensemble/member/is/missing' # model_template is a list of 1 or more. ens_members_path = [] # get all files that exist for ens_member_template in self.c_dict['FCST_INPUT_TEMPLATE']: model_ss = sts.StringSub(self.logger, ens_member_template, **time_info) member_file = model_ss.do_string_sub() expected_path = os.path.join(model_dir, member_file) # if wildcard expression, get all files that match if '?' in expected_path: wildcard_files = sorted(glob.glob(expected_path)) self.logger.debug( 'Ensemble members file pattern: {}'.format(expected_path)) self.logger.debug('{} members match file pattern'.format( str(len(wildcard_files)))) # add files to list of ensemble members for wildcard_file in wildcard_files: ens_members_path.append(wildcard_file) else: # otherwise check if file exists expected_path = util.preprocess_file( expected_path, self.c_dict['FCST_INPUT_DATATYPE'], self.config) # if the file exists, add it to the list if expected_path != None: ens_members_path.append(expected_path) else: # add relative path to fake dir and add to list ens_members_path.append(os.path.join( fake_dir, member_file)) self.logger.warning( 'Expected ensemble file {} not found'.format( member_file)) # if more files found than expected, error and exit if len(ens_members_path) > self.c_dict['N_MEMBERS']: msg = 'Found more files than expected! ' +\ 'Found {} expected {}. '.format(len(ens_members_path), self.c_dict['N_MEMBERS']) +\ 'Adjust wildcard expression in [filename_templates] '+\ 'FCST_ENSEMBLE_STAT_INPUT_TEMPLATE or adjust [config] '+\ 'ENSEMBLE_STAT_N_MEMBERS. Files found: {}'.format(ens_members_path) self.logger.error(msg) self.logger.error( "Could not file files in {} for init {} f{} ".format( model_dir, time_info['init_fmt'], str(time_info['lead_hours']))) return False # if fewer files found than expected, warn and add fake files elif len(ens_members_path) < self.c_dict['N_MEMBERS']: msg = 'Found fewer files than expected. '+\ 'Found {} expected {}.'.format(len(ens_members_path), self.c_dict['N_MEMBERS']) self.logger.warning(msg) # add fake files to list to get correct number of files for ens_thresh diff = self.c_dict['N_MEMBERS'] - len(ens_members_path) self.logger.warning('Adding {} fake files to '.format(str(diff))+\ 'ensure ens_thresh check is accurate') for _ in range(0, diff, 1): ens_members_path.append(fake_dir) # write file that contains list of ensemble files list_filename = time_info['init_fmt'] + '_' + \ str(time_info['lead_hours']) + '_ensemble.txt' return self.write_list_file(list_filename, ens_members_path)
def get_accumulation(self, valid_time, accum, ob_type, is_forecast=False): # TODO: pass in template (input/native) so this isn't assumed file_template = self.p.getraw('filename_templates', ob_type + "_INPUT_TEMPLATE") if self.input_dir == "": self.logger.error(self.app_name + ": Must set data dir to run get_accumulation") exit self.add_arg("-add") if self.p.getbool('config', ob_type + '_IS_DAILY_FILE') is True: # loop accum times data_interval = self.p.getint('config', ob_type + '_DATA_INTERVAL') * 3600 for i in range(0, accum, data_interval): search_time = util.shift_time(valid_time, -i) # find closest file before time f = self.find_closest_before(self.input_dir, search_time, file_template) if f == "": continue # build level info string file_time = datetime.datetime.strptime(f[-18:-8], "%Y%m%d%H") v_time = datetime.datetime.strptime(search_time, "%Y%m%d%H") diff = v_time - file_time lead = int((diff.days * 24) / (data_interval / 3600)) lead += int((v_time - file_time).seconds / data_interval) - 1 fname = self.p.getstr( 'config', ob_type + '_' + str(accum) + '_FIELD_NAME') addon = "'name=\"" + fname + "\"; level=\"(" + \ str(lead) + ",*,*)\";'" self.add_input_file(f, addon) else: # not a daily file # if field that corresponds to search accumulation exists # in the files, # check the file with valid time before moving backwards in time if self.p.has_option( 'config', ob_type + '_' + str(accum) + '_FIELD_NAME') and ob_type != "NATIONAL_BLEND": fSts = sts.StringSub(self.logger, file_template, valid=valid_time, accum=str(accum).zfill(2)) # TODO: This assumes max 99 accumulation. # zfill to 3 if above that is possible search_file = os.path.join(self.input_dir, fSts.doStringSub()) if os.path.exists(search_file): data_type = self.p.getstr('config', ob_type + '_NATIVE_DATA_TYPE') if data_type == "GRIB": addon = accum elif data_type == "NETCDF": fname = self.p.getstr( 'config', ob_type + '_' + str(accum) + '_FIELD_NAME') addon = "'name=\"" + fname + "\"; level=\"(0,*,*)\";'" self.add_input_file(search_file, addon) self.set_output_dir(self.outdir) return start_time = valid_time last_time = util.shift_time(valid_time, -(int(accum) - 1)) total_accum = int(accum) # search_accum = total_accum search_accum = self.p.getint('config', ob_type + '_ACCUM') # loop backwards in time until you have a full set of accum while last_time <= start_time: if is_forecast: f = self.get_lowest_forecast_at_valid(start_time, ob_type) if f == "": break # TODO: assumes 1hr accum (6 for NB) in these files for now if ob_type == "NATIONAL_BLEND": ob_str = self.p.getstr( 'config', ob_type + '_' + str(6) + '_FIELD_NAME') addon = "'name=\"" + ob_str + "\"; level=\"(0,*,*)\";'" else: ob_str = self.p.getstr( 'config', ob_type + '_' + str(1) + '_FIELD_NAME') addon = "'name=\"" + ob_str + "\"; level=\"(0,*,*)\";'" self.add_input_file(f, addon) start_time = util.shift_time(start_time, -1) search_accum -= 1 else: # not looking for forecast files # get all files of valid_time (all accums) # files = sorted(glob.glob("{:s}/{:s}/*{:s}*" # .format(self.input_dir, # start_time[0:8], start_time))) print("INPUTDIR IS:" + self.input_dir + " and START TIME IS:" + start_time) files = sorted( glob.glob("{:s}/*{:s}*".format(self.input_dir, start_time))) # look for biggest accum that fits search while search_accum > 0: fSts = sts.StringSub(self.logger, file_template, valid=start_time, accum=str(search_accum).zfill(2)) search_file = os.path.join(self.input_dir, fSts.doStringSub()) f = None for file in files: if file == search_file: f = file break # if found a file, add it to input list with info if f is not None: addon = "" data_type = self.p.getstr( 'config', ob_type + '_NATIVE_DATA_TYPE') if data_type == "GRIB": addon = search_accum elif data_type == "NETCDF": ob_str = self.p.getstr( 'config', ob_type + '_' + str(search_accum) + '_FIELD_NAME') addon = "'name=\"" + ob_str + \ "\"; level=\"(0,*,*)\";'" self.add_input_file(f, addon) start_time = util.shift_time(start_time + "00", -search_accum)[0:10] total_accum -= search_accum # search_accum = total_accum break search_accum -= 1 if total_accum == 0: break if search_accum == 0: self.logger.warning(self.app_name + ": Could not find " \ "files to compute accumulation for " \ + ob_type) return None self.set_output_dir(self.outdir)
def run_at_time_once(self, ti, v, cur_model): valid_time = ti.getValidTime() init_time = ti.getInitTime() grid_stat_base_dir = self.p.getstr('config', 'GRID_STAT_OUT_DIR') if self.p.getbool('config', 'LOOP_BY_INIT'): grid_stat_out_dir = os.path.join(grid_stat_base_dir, init_time, "grid_stat") else: grid_stat_out_dir = os.path.join(grid_stat_base_dir, valid_time, "grid_stat") fcst_level = v.fcst_level fcst_level_type = "" if (fcst_level[0].isalpha()): fcst_level_type = fcst_level[0] fcst_level = fcst_level[1:] obs_level = v.obs_level obs_level_type = "" if (obs_level[0].isalpha()): obs_level_type = obs_level[0] obs_level = obs_level[1:] #model_type = self.p.getstr('config', 'MODEL_TYPE') model_type = cur_model[1] obs_dir = self.p.getstr('config', 'OBS_GRID_STAT_INPUT_DIR') obs_template = self.p.getraw('filename_templates', 'OBS_GRID_STAT_INPUT_TEMPLATE') model_dir = self.p.getstr('config', 'FCST_GRID_STAT_INPUT_DIR') config_dir = self.p.getstr('config', 'CONFIG_DIR') ymd_v = valid_time[0:8] if not os.path.exists(grid_stat_out_dir): os.makedirs(grid_stat_out_dir) # get model to compare model_path = self.find_model(ti.lead, init_time, fcst_level, cur_model) if model_path == "": print("ERROR: COULD NOT FIND FILE IN " + model_dir) return self.add_input_file(model_path) # TODO: Handle range of levels obsSts = sts.StringSub(self.logger, obs_template, valid=valid_time, init=init_time, level=str(obs_level.split('-')[0]).zfill(2)) obs_file = obsSts.doStringSub() obs_path = os.path.join(obs_dir, obs_file) self.add_input_file(obs_path) self.set_param_file(self.p.getstr('config', 'GRID_STAT_CONFIG')) self.set_output_dir(grid_stat_out_dir) # set up environment variables for each grid_stat run # get fcst and obs thresh parameters # verify they are the same size fcst_str = "FCST_" + v.fcst_name + "_" + fcst_level + "_THRESH" obs_str = "OBS_" + v.obs_name + "_" + obs_level + "_THRESH" fcst_cat_thresh = "" obs_cat_thresh = "" fcst_threshs = [] obs_threshs = [] if self.p.has_option('config', fcst_str): fcst_threshs = util.getlistfloat(self.p.getstr('config', fcst_str)) fcst_cat_thresh = "cat_thresh=[ " for fcst_thresh in fcst_threshs: fcst_cat_thresh += "gt" + str(fcst_thresh) + ", " fcst_cat_thresh = fcst_cat_thresh[0:-2] + " ];" if self.p.has_option('config', obs_str): obs_threshs = util.getlistfloat(self.p.getstr('config', obs_str)) obs_cat_thresh = "cat_thresh=[ " for obs_thresh in obs_threshs: obs_cat_thresh += "gt" + str(obs_thresh) + ", " obs_cat_thresh = obs_cat_thresh[0:-2] + " ];" if len(fcst_threshs) != len(obs_threshs): self.logger.error("run_example: Number of forecast and "\ "observation thresholds must be the same") exit(1) # TODO: Allow NetCDF level with more than 2 dimensions i.e. (1,*,*) # TODO: Need to check data type for PROB fcst? non PROB obs? fcst_field = "" obs_field = "" # TODO: change PROB mode to put all cat thresh values in 1 item if self.p.getbool('config', 'FCST_IS_PROB'): for fcst_thresh in fcst_threshs: fcst_field += "{ name=\"PROB\"; level=\""+fcst_level_type + \ fcst_level.zfill(2) + "\"; prob={ name=\"" + \ v.fcst_name + \ "\"; thresh_lo="+str(fcst_thresh)+"; } }," for obs_thresh in obs_threshs: obs_field += "{ name=\""+v.obs_name+"_"+obs_level.zfill(2) + \ "\"; level=\"(*,*)\"; cat_thresh=[ gt" + \ str(obs_thresh)+" ]; }," else: # data_type = self.p.getstr('config', 'OBS_NATIVE_DATA_TYPE') obs_data_type = util.get_filetype(self.p, obs_path) model_data_type = util.get_filetype(self.p, model_path) if obs_data_type == "NETCDF": obs_field += "{ name=\"" + v.obs_name+"_" + obs_level.zfill(2) + \ "\"; level=\"(*,*)\"; " else: obs_field += "{ name=\""+v.obs_name + \ "\"; level=\"["+obs_level_type + \ obs_level.zfill(2)+"]\"; " if model_data_type == "NETCDF": fcst_field += "{ name=\""+v.fcst_name+"_"+fcst_level.zfill(2) + \ "\"; level=\"(*,*)\"; " else: fcst_field += "{ name=\""+v.fcst_name + \ "\"; level=\"["+fcst_level_type + \ fcst_level.zfill(2)+"]\"; " fcst_field += fcst_cat_thresh + " }," # obs_field += "{ name=\"" + v.obs_name+"_" + obs_level.zfill(2) + \ # "\"; level=\"(*,*)\"; " obs_field += obs_cat_thresh + " }," # remove last comma and } to be added back after extra options fcst_field = fcst_field[0:-2] obs_field = obs_field[0:-2] fcst_field += v.fcst_extra + "}" obs_field += v.obs_extra + "}" ob_type = self.p.getstr('config', "OB_TYPE") verif_polys = util.getlist(self.p.getstr('config', "VERIFICATION_POLY")) verif_poly = "[" for vp in verif_polys: verif_poly += "\"" + vp + "\", " verif_poly = os.path.expandvars(verif_poly[0:-2] + "]") self.add_env_var("MODEL", model_type) self.add_env_var("FCST_VAR", v.fcst_name) self.add_env_var("OBS_VAR", v.obs_name) # TODO: Change ACCUM to LEVEL in GridStatConfig_MEAN/PROB and here self.add_env_var("ACCUM", v.fcst_level) self.add_env_var("OBTYPE", ob_type) self.add_env_var("CONFIG_DIR", config_dir) self.add_env_var("FCST_FIELD", fcst_field) self.add_env_var("OBS_FIELD", obs_field) self.add_env_var("MET_VALID_HHMM", valid_time[4:8]) self.add_env_var("VERIF_POLY", verif_poly) cmd = self.get_command() self.logger.debug("") self.logger.debug("ENVIRONMENT FOR NEXT COMMAND: ") self.print_env_item("MODEL") self.print_env_item("FCST_VAR") self.print_env_item("OBS_VAR") self.print_env_item("ACCUM") self.print_env_item("OBTYPE") self.print_env_item("CONFIG_DIR") self.print_env_item("FCST_FIELD") self.print_env_item("OBS_FIELD") self.print_env_item("MET_VALID_HHMM") self.print_env_item("VERIF_POLY") self.logger.debug("") self.logger.debug("COPYABLE ENVIRONMENT FOR NEXT COMMAND: ") self.print_env_copy([ "MODEL", "FCST_VAR", "OBS_VAR", "ACCUM", "OBTYPE", "CONFIG_DIR", "FCST_FIELD", "OBS_FIELD", "MET_VALID_HHMM" ]) self.logger.debug("") cmd = self.get_command() if cmd is None: print("ERROR: grid_stat could not generate command") return self.logger.info("") self.build() self.clear()
def run_at_time_once(self, time_info, var_info, dtype): """! Runs the MET application for a given time and forecast lead combination Args: @param ti time_info object containing timing information @param v var_info object containing variable information """ self.clear() if dtype == "FCST": compare_var = var_info['fcst_name'] level = var_info['fcst_level'] else: compare_var = var_info['obs_name'] level = var_info['obs_level'] level = util.split_level(level)[1] if self.c_dict[dtype + '_INPUT_DIR'] == '': self.logger.error('Must set {}_REGRID_DATA_PLANE_INPUT_DIR'.format(dtype) +\ ' in config file') exit(1) if self.c_dict[dtype + '_INPUT_TEMPLATE'] == '': self.logger.error('Must set {}_REGRID_DATA_PLANE_INPUT_TEMPLATE'.format(dtype) +\ ' in config file') exit(1) if self.c_dict[dtype + '_OUTPUT_DIR'] == '': self.logger.error('Must set {}_REGRID_DATA_PLANE_OUTPUT_DIR'.format(dtype) +\ ' in config file') exit(1) if self.c_dict[dtype + '_OUTPUT_TEMPLATE'] == '': self.logger.error('Must set {}_REGRID_DATA_PLANE_OUTPUT_TEMPLATE'.format(dtype) +\ ' in config file') exit(1) input_dir = self.c_dict[dtype + '_INPUT_DIR'] input_template = self.c_dict[dtype + '_INPUT_TEMPLATE'] output_dir = self.c_dict[dtype + '_OUTPUT_DIR'] output_template = self.c_dict[dtype + '_OUTPUT_TEMPLATE'] if not level.isdigit(): f_level = '0' else: f_level = level string_sub = sts.StringSub(self.logger, input_template, level=(int(f_level) * 3600), **time_info) infile = os.path.join(input_dir, string_sub.do_string_sub()) infile = util.preprocess_file( infile, self.config.getstr('config', dtype + '_REGRID_DATA_PLANE_INPUT_DATATYPE', ''), self.config) if infile is not None: self.infiles.append(infile) else: self.logger.error( 'Could not find input file in {} matching template {}'.format( input_dir, input_template)) return False verif_grid = self.c_dict['VERIFICATION_GRID'] if verif_grid == '': self.logger.error('No verification grid specified! ' + \ 'Set REGRID_DATA_PLANE_VERIF_GRID') return False self.infiles.append(verif_grid) string_sub = sts.StringSub(self.logger, output_template, level=(int(f_level) * 3600), **time_info) outfile = string_sub.do_string_sub() self.set_output_path(os.path.join(output_dir, outfile)) outpath = self.get_output_path() if os.path.exists(outpath) and \ self.c_dict['SKIP_IF_OUTPUT_EXISTS'] is True: self.logger.debug( 'Skip writing output file {} because it already ' 'exists. Remove file or change ' 'REGRID_DATA_PLANE_SKIP_IF_OUTPUT_EXISTS to True to process'. format(outpath)) return True if self.config.getstr('config', dtype + '_REGRID_DATA_PLANE_INPUT_DATATYPE', '') in ['', 'NETCDF']: field_name = "{:s}_{:s}".format(compare_var, str(level).zfill(2)) self.args.append( "-field 'name=\"{:s}\"; level=\"(*,*)\";'".format(field_name)) else: field_name = "{:s}".format(compare_var) self.args.append("-field 'name=\"{:s}\"; level=\"{:s}\";'".format( field_name, level)) if self.c_dict['METHOD'] != '': self.args.append("-method {}".format(self.c_dict['METHOD'])) self.args.append("-width {}".format(self.c_dict['WIDTH'])) self.args.append("-name " + field_name) cmd = self.get_command() if cmd is None: self.logger.error("Could not generate command") return self.build()
def run_at_time_once(self, valid_time, accum, ob_type, fcst_var, is_forecast=False): input_dir = self.p.getstr('config', ob_type + '_INPUT_DIR') native_dir = self.p.getstr('config', ob_type + '_NATIVE_DIR') bucket_dir = self.p.getstr('config', ob_type + '_BUCKET_DIR') bucket_template = self.p.getraw('filename_templates', ob_type + '_BUCKET_TEMPLATE') ymd_v = valid_time[0:8] if ob_type != "QPE": if not os.path.exists(os.path.join(native_dir, ymd_v)): os.makedirs(os.path.join(native_dir, ymd_v)) if not os.path.exists(os.path.join(bucket_dir, ymd_v)): os.makedirs(os.path.join(bucket_dir, ymd_v)) self.set_input_dir(input_dir) self.set_output_dir(bucket_dir) self.get_accumulation(valid_time[0:10], int(accum), ob_type, is_forecast) # call GempakToCF if native file doesn't exist infiles = self.get_input_files() for idx, infile in enumerate(infiles): # replace input_dir with native_dir, check if file exists nfile = infile.replace(input_dir, native_dir) data_type = self.p.getstr('config', ob_type + '_NATIVE_DATA_TYPE') if data_type == "NETCDF": nfile = os.path.splitext(nfile)[0] + '.nc' if not os.path.isfile(nfile): print("Calling GempakToCF to convert to NetCDF") run_g2c = GempakToCFWrapper(self.p, self.logger) run_g2c.add_input_file(infile) run_g2c.set_output_path(nfile) cmd = run_g2c.get_command() if cmd is None: print("ERROR: GempakToCF could not generate command") continue run_g2c.build() infiles[idx] = nfile pcpSts = sts.StringSub(self.logger, bucket_template, valid=valid_time, accum=str(accum).zfill(2)) pcp_out = pcpSts.doStringSub() self.set_output_filename(pcp_out) # if(is_forecast): # varname = self.p.getstr('config', fcst_var+"_VAR") # else: varname = self.p.getstr('config', ob_type + "_VAR") self.add_arg("-name " + varname + "_" + accum) cmd = self.get_command() if cmd is None: print("ERROR: pcp_combine could not generate command") return self.logger.info("") self.build() outfile = self.get_output_path() self.clear()
def run_at_time_once(self, ti): grid_stat_out_dir = self.p.getstr('config', 'GRID_STAT_OUT_DIR') # valid_time = util.shift_time(ti.init_time, ti.lead) valid_time = ti.getValidTime() init_time = ti.getInitTime() accum = ti.level model_type = self.p.getstr('config', 'MODEL_TYPE') regrid_dir = self.p.getstr('config', ti.ob_type + '_REGRID_DIR') regrid_template = self.p.getraw('filename_templates', ti.ob_type + '_REGRID_TEMPLATE') model_bucket_dir = self.p.getstr('config', model_type + '_BUCKET_DIR') obs_var = self.p.getstr('config', ti.ob_type + "_VAR") config_dir = self.p.getstr('config', 'CONFIG_DIR') ymd_v = valid_time[0:8] if not os.path.exists( os.path.join(grid_stat_out_dir, init_time, "grid_stat")): os.makedirs(os.path.join(grid_stat_out_dir, init_time, "grid_stat")) if not os.path.exists(os.path.join(model_bucket_dir, ymd_v)): os.makedirs(os.path.join(model_bucket_dir, ymd_v)) # get model to compare model_dir = self.p.getstr('config', model_type + '_INPUT_DIR') # check if accum exists in forecast file # If not, run pcp_combine to create it # TODO: remove reliance on model_type if model_type == 'HREF_MEAN' or model_type == "NATIONAL_BLEND": native_dir = self.p.getstr('config', model_type + '_NATIVE_DIR') run_pcp_ob = PcpCombineWrapper(self.p, self.logger) # run_pcp_ob.run_at_time(valid_time, int(accum), # model_type, fcst_var, True) # valid_time = util.shift_time(init_time, lead) run_pcp_ob.set_input_dir(model_dir) run_pcp_ob.set_output_dir(model_bucket_dir) # run_pcp_ob.get_accumulation(valid_time, int(accum), run_pcp_ob.get_accumulation(valid_time, accum, model_type, True) # call GempakToCF if native file doesn't exist infiles = run_pcp_ob.get_input_files() for idx, infile in enumerate(infiles): # replace input_dir with native_dir, check if file exists nfile = infile.replace(model_dir, native_dir) if not os.path.exists(os.path.dirname(nfile)): os.makedirs(os.path.dirname(nfile)) data_type = self.p.getstr('config', ti.ob_type + '_NATIVE_DATA_TYPE') if data_type == "NETCDF": nfile = os.path.splitext(nfile)[0] + '.nc' if not os.path.isfile(nfile): print("Calling GempakToCF to convert model to NetCDF") run_g2c = GempakToCFWrapper(self.p, self.logger) run_g2c.add_input_file(infile) run_g2c.set_output_path(nfile) cmd = run_g2c.get_command() if cmd is None: print( "ERROR: GempakToCF could not generate command") return print("RUNNING: " + str(cmd)) run_g2c.build() run_pcp_ob.infiles[idx] = nfile bucket_template = self.p.getraw('filename_templates', model_type + '_BUCKET_TEMPLATE') pcpSts = sts.StringSub(self.logger, bucket_template, valid=valid_time, accum=str(ti.level).zfill(2)) pcp_out = pcpSts.doStringSub() run_pcp_ob.set_output_filename(pcp_out) run_pcp_ob.add_arg("-name " + ti.fcst_var + "_" + ti.level) cmd = run_pcp_ob.get_command() if cmd is None: print("ERROR: pcp_combine observation could not "\ "generate command") return print("RUNNING: " + str(cmd)) self.logger.info("") run_pcp_ob.build() model_path = run_pcp_ob.get_output_path() else: model_path = self.find_model(model_type, ti.lead, init_time) if model_path == "": print("ERROR: COULD NOT FIND FILE IN " + model_dir) return self.add_input_file(model_path) regridSts = sts.StringSub(self.logger, regrid_template, valid=valid_time, accum=str(accum).zfill(2)) regrid_file = regridSts.doStringSub() regrid_path = os.path.join(regrid_dir, regrid_file) self.add_input_file(regrid_path) if self.p.getbool('config', model_type + '_IS_PROB'): self.set_param_file(self.p.getstr('config', 'MET_CONFIG_GSP')) else: self.set_param_file(self.p.getstr('config', 'MET_CONFIG_GSM')) self.set_output_dir( os.path.join(grid_stat_out_dir, init_time, "grid_stat")) # set up environment variables for each grid_stat run # get fcst and obs thresh parameters # verify they are the same size fcst_str = model_type + "_" + ti.fcst_var + "_" + accum + "_THRESH" fcst_threshs = util.getlistfloat(self.p.getstr('config', fcst_str)) obs_str = ti.ob_type + "_" + ti.fcst_var + "_" + accum + "_THRESH" obs_threshs = util.getlistfloat(self.p.getstr('config', obs_str)) if len(fcst_threshs) != len(obs_threshs): self.logger.error("run_example: Number of forecast and "\ "observation thresholds must be the same") exit fcst_field = "" obs_field = "" if self.p.getbool('config', model_type + '_IS_PROB'): for fcst_thresh in fcst_threshs: fcst_field += "{ name=\"PROB\"; level=\"A"+accum + \ "\"; prob={ name=\""+ti.fcst_var + \ "\"; thresh_lo="+str(fcst_thresh)+"; } }," for obs_thresh in obs_threshs: obs_field += "{ name=\""+obs_var+"_"+accum + \ "\"; level=\"(*,*)\"; cat_thresh=[ gt" + \ str(obs_thresh)+" ]; }," else: data_type = self.p.getstr('config', ti.ob_type + '_NATIVE_DATA_TYPE') if data_type == "NETCDF": fcst_field += "{ name=\""+ti.fcst_var+"_"+accum + \ "\"; level=\"(*,*)\"; cat_thresh=[" else: fcst_field += "{ name=\""+ti.fcst_var + \ "\"; level=\"[A"+accum.zfill(2)+"]\"; cat_thresh=[" for fcst_thresh in fcst_threshs: fcst_field += "gt" + str(fcst_thresh) + ", " fcst_field = fcst_field[0:-2] fcst_field += " ]; }," obs_field += "{ name=\"" + obs_var+"_" + accum + \ "\"; level=\"(*,*)\"; cat_thresh=[ " for obs_thresh in obs_threshs: obs_field += "gt" + str(obs_thresh) + ", " obs_field = obs_field[0:-2] obs_field += " ]; }," # remove last comma fcst_field = fcst_field[0:-1] obs_field = obs_field[0:-1] self.add_env_var("MODEL", model_type) self.add_env_var("FCST_VAR", ti.fcst_var) self.add_env_var("OBS_VAR", obs_var) self.add_env_var("ACCUM", accum) self.add_env_var("OBTYPE", ti.ob_type) self.add_env_var("CONFIG_DIR", config_dir) self.add_env_var("FCST_FIELD", fcst_field) self.add_env_var("OBS_FIELD", obs_field) cmd = self.get_command() self.logger.debug("") self.logger.debug("ENVIRONMENT FOR NEXT COMMAND: ") self.print_env_item("MODEL") self.print_env_item("FCST_VAR") self.print_env_item("OBS_VAR") self.print_env_item("ACCUM") self.print_env_item("OBTYPE") self.print_env_item("CONFIG_DIR") self.print_env_item("FCST_FIELD") self.print_env_item("OBS_FIELD") self.logger.debug("") self.logger.debug("COPYABLE ENVIRONMENT FOR NEXT COMMAND: ") self.print_env_copy([ "MODEL", "FCST_VAR", "OBS_VAR", "ACCUM", "OBTYPE", "CONFIG_DIR", "FCST_FIELD", "OBS_FIELD" ]) self.logger.debug("") cmd = self.get_command() if cmd is None: print("ERROR: grid_stat (observation) could not generate command") return print("RUNNING: " + str(cmd)) self.logger.info("") self.build() self.clear()
def setup_subtract_method(self, time_info, var_info, rl): """!Setup pcp_combine to subtract two files to build desired accumulation Args: @param time_info object containing timing information @param var_info object containing variable information @params rl data type (FCST or OBS) @rtype string @return path to output file""" self.clear() in_dir, in_template = self.get_dir_and_template(rl, 'INPUT') out_dir, out_template = self.get_dir_and_template(rl, 'OUTPUT') if rl == 'FCST': accum = var_info['fcst_level'] else: accum = var_info['obs_level'] if accum[0].isalpha(): accum = accum[1:] lead = time_info['lead_hours'] lead2 = lead - int(accum) self.set_method("SUBTRACT") pcpSts1 = sts.StringSub(self.logger, in_template, level=(int(accum) * 3600), **time_info) file1 = os.path.join(in_dir, pcpSts1.do_string_sub()) file1 = util.preprocess_file(file1, self.c_dict[rl + '_INPUT_DATATYPE'], self.config) if file1 is None: self.logger.error( "Could not find file in {} for init time {} and lead {}". format(in_dir, time_info['init_fmt'], lead)) return None # set time info for second lead input_dict2 = {'init': time_info['init'], 'lead_hours': lead2} time_info2 = time_util.ti_calculate(input_dict2) pcpSts2 = sts.StringSub(self.logger, in_template, level=(int(accum) * 3600), **time_info2) file2 = os.path.join(in_dir, pcpSts2.do_string_sub()) file2 = util.preprocess_file(file2, self.c_dict[rl + '_INPUT_DATATYPE'], self.config) if file2 is None: self.logger.error( "Could not find file in {} for init time {} and lead {}". format(in_dir, time_info2['init_fmt'], lead2)) return None self.add_input_file(file1, lead) self.add_input_file(file2, lead2) outSts = sts.StringSub(self.logger, out_template, level=(int(accum) * 3600), **time_info) out_file = outSts.do_string_sub() self.outfile = out_file self.outdir = out_dir return self.get_command()
def find_data(self, time_info, var_info, data_type): """! Finds the data file to compare Args: @param time_info dictionary containing timing information @param var_info object containing variable information @param data_type type of data to find (FCST or OBS) @rtype string @return Returns the path to an observation file """ # get time info valid_time = time_info['valid_fmt'] if var_info is not None: # set level based on input data type if data_type.startswith("OBS"): v_level = var_info['obs_level'] else: v_level = var_info['fcst_level'] # separate character from beginning of numeric level value if applicable level = util.split_level(v_level)[1] # set level to 0 character if it is not a number if not level.isdigit(): level = '0' else: level = '0' template = self.c_dict[data_type + '_INPUT_TEMPLATE'] data_dir = self.c_dict[data_type + '_INPUT_DIR'] # if looking for a file with an exact time match: if self.c_dict[data_type + '_FILE_WINDOW_BEGIN'] == 0 and \ self.c_dict[data_type + '_FILE_WINDOW_END'] == 0: # perform string substitution dsts = sts.StringSub(self.logger, template, level=(int(level.split('-')[0]) * 3600), **time_info) filename = dsts.do_string_sub() # build full path with data directory and filename path = os.path.join(data_dir, filename) # check if desired data file exists and if it needs to be preprocessed path = util.preprocess_file( path, self.c_dict[data_type + '_INPUT_DATATYPE'], self.config) return path # if looking for a file within a time window: # convert valid_time to unix time valid_seconds = int( datetime.strptime(valid_time, "%Y%m%d%H%M").strftime("%s")) # get time of each file, compare to valid time, save best within range closest_files = [] closest_time = 9999999 # get range of times that will be considered valid_range_lower = self.c_dict[data_type + '_FILE_WINDOW_BEGIN'] valid_range_upper = self.c_dict[data_type + '_FILE_WINDOW_END'] lower_limit = int( datetime.strptime( util.shift_time_seconds(valid_time, valid_range_lower), "%Y%m%d%H%M").strftime("%s")) upper_limit = int( datetime.strptime( util.shift_time_seconds(valid_time, valid_range_upper), "%Y%m%d%H%M").strftime("%s")) # step through all files under input directory in sorted order # pylint:disable=unused-variable # os.walk returns a tuple. Not all returned values are needed. for dirpath, dirnames, all_files in os.walk(data_dir): for filename in sorted(all_files): fullpath = os.path.join(dirpath, filename) # remove input data directory to get relative path rel_path = fullpath.replace(data_dir + "/", "") # extract time information from relative path using template file_time_info = util.get_time_from_file( self.logger, rel_path, template) if file_time_info is not None: # get valid time and check if it is within the time range file_valid_time = file_time_info['valid'].strftime( "%Y%m%d%H%M") # skip if could not extract valid time if file_valid_time == '': continue file_valid_dt = datetime.strptime(file_valid_time, "%Y%m%d%H%M") file_valid_seconds = int(file_valid_dt.strftime("%s")) # skip if outside time range if file_valid_seconds < lower_limit or file_valid_seconds > upper_limit: continue # if only 1 file is allowed, check if file is # closer to desired valid time than previous match if not self.c_dict['ALLOW_MULTIPLE_FILES']: diff = abs(valid_seconds - file_valid_seconds) if diff < closest_time: closest_time = diff del closest_files[:] closest_files.append(fullpath) # if multiple files are allowed, get all files within range else: closest_files.append(fullpath) if not closest_files: return None # check if file(s) needs to be preprocessed before returning the path # return single file path if 1 file was found if len(closest_files) == 1: return util.preprocess_file( closest_files[0], self.c_dict[data_type + '_INPUT_DATATYPE'], self.config) # return list if multiple files are found out = [] for close_file in closest_files: outfile = util.preprocess_file( close_file, self.c_dict[data_type + '_INPUT_DATATYPE'], self.config) out.append(outfile) return out