def getLowestForecastFile(self, valid_time, dtype, template): """!Find the lowest forecast hour that corresponds to the valid time Args: @param valid_time valid time to search @param dtype data type (FCST or OBS) to get filename template @rtype string @return Path to file with the lowest forecast hour""" out_file = None # search for file with lowest forecast, then loop up into you find a valid one min_forecast = self.c_dict[dtype + '_MIN_FORECAST'] max_forecast = self.c_dict[dtype + '_MAX_FORECAST'] forecast_lead = min_forecast while forecast_lead <= max_forecast: input_dict = {} input_dict['valid'] = valid_time input_dict['lead_hours'] = forecast_lead time_info = time_util.ti_calculate(input_dict) fSts = sts.StringSub(self.logger, template, **time_info) search_file = os.path.join(self.input_dir, fSts.do_string_sub()) search_file = util.preprocess_file( search_file, self.c_dict[dtype + '_INPUT_DATATYPE'], self.config) if search_file != None: return search_file forecast_lead += 1 return None
def test_preprocess_file_unzipped(): conf = ConfigWrapper(metplus_config(), None) stage_dir = conf.getdir('STAGING_DIR', os.path.join(conf.getdir('OUTPUT_BASE'), "stage")) filepath = conf.getdir( 'METPLUS_BASE') + "/internal_tests/data/zip/testfile4.txt" outpath = util.preprocess_file(filepath, None, conf) assert (filepath == outpath and os.path.exists(outpath))
def test_preprocess_file_zip(): conf = metplus_config() stage_dir = conf.getdir('STAGING_DIR', os.path.join(conf.getdir('OUTPUT_BASE'), "stage")) filepath = conf.getdir( 'METPLUS_BASE') + "/internal_tests/data/zip/testfile3.txt.zip" stagepath = stage_dir + conf.getdir( 'METPLUS_BASE') + "/internal_tests/data/zip/testfile3.txt" outpath = util.preprocess_file(filepath, None, conf) assert (stagepath == outpath and os.path.exists(outpath))
def find_input_file(self, in_template, search_time, search_accum, data_src): fSts = sts.StringSub(self.logger, in_template, valid=search_time, level=(int(search_accum) * 3600)) search_file = os.path.join(self.input_dir, fSts.do_string_sub()) return util.preprocess_file(search_file, self.c_dict[data_src + '_INPUT_DATATYPE'], self.config)
def get_daily_file(self, time_info, accum, data_src, file_template): """!Pull accumulation out of file that contains a full day of data Args: @param valid_time valid time to search @param accum accumulation to extract from file @param data_src type of data (FCST or OBS) @param file_template filename template to search @rtype bool @return True if file was added to output list, False if not""" data_interval = self.c_dict[data_src + '_DATA_INTERVAL'] times_per_file = self.c_dict[data_src + '_TIMES_PER_FILE'] search_file = None # loop from valid_time back to data interval * times per file for i in range(0, times_per_file + 1): search_time = time_info['valid'] - datetime.timedelta( hours=(i * data_interval)) # check if file exists dSts = sts.StringSub(self.logger, file_template, valid=search_time) search_file = os.path.join(self.input_dir, dSts.do_string_sub()) search_file = util.preprocess_file(search_file, self.c_dict[data_src+\ '_INPUT_DATATYPE'], self.config) if search_file is not None: break if search_file == None: return False diff = time_info['valid'] - search_time # Specifying integer division // Python 3, # assuming that was the intent in Python 2. lead = int((diff.days * 24) // (data_interval)) lead += int((diff).seconds // (data_interval * 3600)) - 1 # calling config.conf version of getter so default value is not # set in log and final conf because it is unnecessary fname = self.config.conf.getstr( 'config', data_src + '_PCP_COMBINE_' + str(accum) + '_FIELD_NAME', '') if fname == '': self.logger.error( 'NetCDF field name was not set in config: {}'.format( data_src + '_PCP_COMBINE_' + str(accum) + '_FIELD_NAME')) return False addon = "'name=\"" + fname + "\"; level=\"(" + \ str(lead) + ",*,*)\";'" self.add_input_file(search_file, addon) return True
def setup_subtract_method(self, time_info, var_info, rl): """!Setup pcp_combine to subtract two files to build desired accumulation Args: @param time_info object containing timing information @param var_info object containing variable information @params rl data type (FCST or OBS) @rtype string @return path to output file""" self.clear() in_dir, in_template = self.get_dir_and_template(rl, 'INPUT') out_dir, out_template = self.get_dir_and_template(rl, 'OUTPUT') if rl == 'FCST': accum = var_info['fcst_level'] else: accum = var_info['obs_level'] if accum[0].isalpha(): accum = accum[1:] lead = time_info['lead_hours'] lead2 = lead - int(accum) self.set_method("SUBTRACT") pcpSts1 = sts.StringSub(self.logger, in_template, level=(int(accum) * 3600), **time_info) file1 = os.path.join(in_dir, pcpSts1.do_string_sub()) file1 = util.preprocess_file(file1, self.c_dict[rl + '_INPUT_DATATYPE'], self.config) if file1 is None: self.logger.error( "Could not find file in {} for init time {} and lead {}". format(in_dir, time_info['init_fmt'], lead)) return None # set time info for second lead input_dict2 = {'init': time_info['init'], 'lead_hours': lead2} time_info2 = time_util.ti_calculate(input_dict2) pcpSts2 = sts.StringSub(self.logger, in_template, level=(int(accum) * 3600), **time_info2) file2 = os.path.join(in_dir, pcpSts2.do_string_sub()) file2 = util.preprocess_file(file2, self.c_dict[rl + '_INPUT_DATATYPE'], self.config) if file2 is None: self.logger.error( "Could not find file in {} for init time {} and lead {}". format(in_dir, time_info2['init_fmt'], lead2)) return None self.add_input_file(file1, lead) self.add_input_file(file2, lead2) outSts = sts.StringSub(self.logger, out_template, level=(int(accum) * 3600), **time_info) out_file = outSts.do_string_sub() self.outfile = out_file self.outdir = out_dir return self.get_command()
def find_data(self, time_info, var_info, data_type): """! Finds the data file to compare Args: @param time_info dictionary containing timing information @param var_info object containing variable information @param data_type type of data to find (FCST or OBS) @rtype string @return Returns the path to an observation file """ # get time info valid_time = time_info['valid_fmt'] if var_info is not None: # set level based on input data type if data_type.startswith("OBS"): v_level = var_info['obs_level'] else: v_level = var_info['fcst_level'] # separate character from beginning of numeric level value if applicable level = util.split_level(v_level)[1] # set level to 0 character if it is not a number if not level.isdigit(): level = '0' else: level = '0' template = self.c_dict[data_type + '_INPUT_TEMPLATE'] data_dir = self.c_dict[data_type + '_INPUT_DIR'] # if looking for a file with an exact time match: if self.c_dict[data_type + '_FILE_WINDOW_BEGIN'] == 0 and \ self.c_dict[data_type + '_FILE_WINDOW_END'] == 0: # perform string substitution dsts = sts.StringSub(self.logger, template, level=(int(level.split('-')[0]) * 3600), **time_info) filename = dsts.do_string_sub() # build full path with data directory and filename path = os.path.join(data_dir, filename) # check if desired data file exists and if it needs to be preprocessed path = util.preprocess_file( path, self.c_dict[data_type + '_INPUT_DATATYPE'], self.config) return path # if looking for a file within a time window: # convert valid_time to unix time valid_seconds = int( datetime.strptime(valid_time, "%Y%m%d%H%M").strftime("%s")) # get time of each file, compare to valid time, save best within range closest_files = [] closest_time = 9999999 # get range of times that will be considered valid_range_lower = self.c_dict[data_type + '_FILE_WINDOW_BEGIN'] valid_range_upper = self.c_dict[data_type + '_FILE_WINDOW_END'] lower_limit = int( datetime.strptime( util.shift_time_seconds(valid_time, valid_range_lower), "%Y%m%d%H%M").strftime("%s")) upper_limit = int( datetime.strptime( util.shift_time_seconds(valid_time, valid_range_upper), "%Y%m%d%H%M").strftime("%s")) # step through all files under input directory in sorted order # pylint:disable=unused-variable # os.walk returns a tuple. Not all returned values are needed. for dirpath, dirnames, all_files in os.walk(data_dir): for filename in sorted(all_files): fullpath = os.path.join(dirpath, filename) # remove input data directory to get relative path rel_path = fullpath.replace(data_dir + "/", "") # extract time information from relative path using template file_time_info = util.get_time_from_file( self.logger, rel_path, template) if file_time_info is not None: # get valid time and check if it is within the time range file_valid_time = file_time_info['valid'].strftime( "%Y%m%d%H%M") # skip if could not extract valid time if file_valid_time == '': continue file_valid_dt = datetime.strptime(file_valid_time, "%Y%m%d%H%M") file_valid_seconds = int(file_valid_dt.strftime("%s")) # skip if outside time range if file_valid_seconds < lower_limit or file_valid_seconds > upper_limit: continue # if only 1 file is allowed, check if file is # closer to desired valid time than previous match if not self.c_dict['ALLOW_MULTIPLE_FILES']: diff = abs(valid_seconds - file_valid_seconds) if diff < closest_time: closest_time = diff del closest_files[:] closest_files.append(fullpath) # if multiple files are allowed, get all files within range else: closest_files.append(fullpath) if not closest_files: return None # check if file(s) needs to be preprocessed before returning the path # return single file path if 1 file was found if len(closest_files) == 1: return util.preprocess_file( closest_files[0], self.c_dict[data_type + '_INPUT_DATATYPE'], self.config) # return list if multiple files are found out = [] for close_file in closest_files: outfile = util.preprocess_file( close_file, self.c_dict[data_type + '_INPUT_DATATYPE'], self.config) out.append(outfile) return out
def test_preprocess_file_none(): conf = ConfigWrapper(metplus_config(), None) outpath = util.preprocess_file(None, None, conf) assert (outpath is None)
def find_model_members(self, time_info): """! Finds the model member files to compare Args: @param time_info dictionary containing timing information @rtype string @return Returns a list of the paths to the ensemble model files """ model_dir = self.c_dict['FCST_INPUT_DIR'] # used for filling in missing files to ensure ens_thresh check is accurate fake_dir = '/ensemble/member/is/missing' # model_template is a list of 1 or more. ens_members_path = [] # get all files that exist for ens_member_template in self.c_dict['FCST_INPUT_TEMPLATE']: model_ss = sts.StringSub(self.logger, ens_member_template, **time_info) member_file = model_ss.do_string_sub() expected_path = os.path.join(model_dir, member_file) # if wildcard expression, get all files that match if '?' in expected_path: wildcard_files = sorted(glob.glob(expected_path)) self.logger.debug( 'Ensemble members file pattern: {}'.format(expected_path)) self.logger.debug('{} members match file pattern'.format( str(len(wildcard_files)))) # add files to list of ensemble members for wildcard_file in wildcard_files: ens_members_path.append(wildcard_file) else: # otherwise check if file exists expected_path = util.preprocess_file( expected_path, self.c_dict['FCST_INPUT_DATATYPE'], self.config) # if the file exists, add it to the list if expected_path != None: ens_members_path.append(expected_path) else: # add relative path to fake dir and add to list ens_members_path.append(os.path.join( fake_dir, member_file)) self.logger.warning( 'Expected ensemble file {} not found'.format( member_file)) # if more files found than expected, error and exit if len(ens_members_path) > self.c_dict['N_MEMBERS']: msg = 'Found more files than expected! ' +\ 'Found {} expected {}. '.format(len(ens_members_path), self.c_dict['N_MEMBERS']) +\ 'Adjust wildcard expression in [filename_templates] '+\ 'FCST_ENSEMBLE_STAT_INPUT_TEMPLATE or adjust [config] '+\ 'ENSEMBLE_STAT_N_MEMBERS. Files found: {}'.format(ens_members_path) self.logger.error(msg) self.logger.error( "Could not file files in {} for init {} f{} ".format( model_dir, time_info['init_fmt'], str(time_info['lead_hours']))) return False # if fewer files found than expected, warn and add fake files elif len(ens_members_path) < self.c_dict['N_MEMBERS']: msg = 'Found fewer files than expected. '+\ 'Found {} expected {}.'.format(len(ens_members_path), self.c_dict['N_MEMBERS']) self.logger.warning(msg) # add fake files to list to get correct number of files for ens_thresh diff = self.c_dict['N_MEMBERS'] - len(ens_members_path) self.logger.warning('Adding {} fake files to '.format(str(diff))+\ 'ensure ens_thresh check is accurate') for _ in range(0, diff, 1): ens_members_path.append(fake_dir) # write file that contains list of ensemble files list_filename = time_info['init_fmt'] + '_' + \ str(time_info['lead_hours']) + '_ensemble.txt' return self.write_list_file(list_filename, ens_members_path)
def run_at_time_once(self, time_info, var_info, dtype): """! Runs the MET application for a given time and forecast lead combination Args: @param ti time_info object containing timing information @param v var_info object containing variable information """ self.clear() if dtype == "FCST": compare_var = var_info['fcst_name'] level = var_info['fcst_level'] else: compare_var = var_info['obs_name'] level = var_info['obs_level'] level = util.split_level(level)[1] if self.c_dict[dtype + '_INPUT_DIR'] == '': self.logger.error('Must set {}_REGRID_DATA_PLANE_INPUT_DIR'.format(dtype) +\ ' in config file') exit(1) if self.c_dict[dtype + '_INPUT_TEMPLATE'] == '': self.logger.error('Must set {}_REGRID_DATA_PLANE_INPUT_TEMPLATE'.format(dtype) +\ ' in config file') exit(1) if self.c_dict[dtype + '_OUTPUT_DIR'] == '': self.logger.error('Must set {}_REGRID_DATA_PLANE_OUTPUT_DIR'.format(dtype) +\ ' in config file') exit(1) if self.c_dict[dtype + '_OUTPUT_TEMPLATE'] == '': self.logger.error('Must set {}_REGRID_DATA_PLANE_OUTPUT_TEMPLATE'.format(dtype) +\ ' in config file') exit(1) input_dir = self.c_dict[dtype + '_INPUT_DIR'] input_template = self.c_dict[dtype + '_INPUT_TEMPLATE'] output_dir = self.c_dict[dtype + '_OUTPUT_DIR'] output_template = self.c_dict[dtype + '_OUTPUT_TEMPLATE'] if not level.isdigit(): f_level = '0' else: f_level = level string_sub = sts.StringSub(self.logger, input_template, level=(int(f_level) * 3600), **time_info) infile = os.path.join(input_dir, string_sub.do_string_sub()) infile = util.preprocess_file( infile, self.config.getstr('config', dtype + '_REGRID_DATA_PLANE_INPUT_DATATYPE', ''), self.config) if infile is not None: self.infiles.append(infile) else: self.logger.error( 'Could not find input file in {} matching template {}'.format( input_dir, input_template)) return False verif_grid = self.c_dict['VERIFICATION_GRID'] if verif_grid == '': self.logger.error('No verification grid specified! ' + \ 'Set REGRID_DATA_PLANE_VERIF_GRID') return False self.infiles.append(verif_grid) string_sub = sts.StringSub(self.logger, output_template, level=(int(f_level) * 3600), **time_info) outfile = string_sub.do_string_sub() self.set_output_path(os.path.join(output_dir, outfile)) outpath = self.get_output_path() if os.path.exists(outpath) and \ self.c_dict['SKIP_IF_OUTPUT_EXISTS'] is True: self.logger.debug( 'Skip writing output file {} because it already ' 'exists. Remove file or change ' 'REGRID_DATA_PLANE_SKIP_IF_OUTPUT_EXISTS to True to process'. format(outpath)) return True if self.config.getstr('config', dtype + '_REGRID_DATA_PLANE_INPUT_DATATYPE', '') in ['', 'NETCDF']: field_name = "{:s}_{:s}".format(compare_var, str(level).zfill(2)) self.args.append( "-field 'name=\"{:s}\"; level=\"(*,*)\";'".format(field_name)) else: field_name = "{:s}".format(compare_var) self.args.append("-field 'name=\"{:s}\"; level=\"{:s}\";'".format( field_name, level)) if self.c_dict['METHOD'] != '': self.args.append("-method {}".format(self.c_dict['METHOD'])) self.args.append("-width {}".format(self.c_dict['WIDTH'])) self.args.append("-name " + field_name) cmd = self.get_command() if cmd is None: self.logger.error("Could not generate command") return self.build()